\documentclass[a4paper,preprint,12pt]{elsarticle}
\usepackage{natbib}
%\usepackage{authblk}
\usepackage{lscape}
\usepackage{pdflscape}
\usepackage[latin1]{inputenc} 
\usepackage[english]{babel}
\usepackage{amsmath}
\usepackage{caption}
\usepackage{multirow}
\usepackage{xcolor}
\usepackage{url}
\usepackage[pdftex, plainpages = false, pdfpagelabels,
                pdfpagelayout = useoutlines,
                 bookmarks,
                 bookmarksopen = true,
                 bookmarksnumbered = true,
                 breaklinks = true,
                 linktocpage,
                 pagebackref,
                 colorlinks = true,
                 linkcolor = blue,
                 urlcolor  = blue,
                 citecolor = blue,
                 anchorcolor = green,
                 hyperindex = true,
                 hyperfigures]{hyperref}

\begin{document}

\title{The Problems and Solutions of Predicting Participation \\ in Energy Efficiency Programs}
\author{Alexander L. Davis\corref{cor1}} 
\author{Tamar Krishnamurti}

\address{Department of Engineering and Public Policy \\ Carnegie Mellon University \\ 5000 Forbes Ave., Pittsburgh, PA 15213}

\cortext[cor1]{Corresponding Author.  Center for Climate and Energy Decision-Making. Phone: 1-412-216-2040. Email: alexander [dot] l [dot] davis1 [at] gmail [dot] com}

\begin{abstract}
This paper discusses volunteer bias in energy efficiency pilot studies.  We briefly evaluate the bias in existing trials. We then present a study showing how volunteer bias can be corrected when not avoidable, using Classification Trees and a questionnaire that is at most five questions long and takes only one minute to complete.  This paper should allow researchers who employ field studies involving human behavior to be well equipped to deal with volunteer bias.
\end{abstract}

\begin{keyword}
Volunteer bias \sep Field Studies \sep Prediction \sep Human Behavior
\end{keyword}

\renewcommand{\topfraction}{1.0}
\maketitle

<<preamble,echo=false,results=hide>>=
load("vmoo")
vmoo<-read.csv("vmoo-open.removed.csv",na.string="")
vmoo2<-read.csv("vmoo2-full.csv",na.string="")
#install.packages("xtable")
#install.packages("ggplot2")
#install.packages("psychometric")
#install.packages("rpart")
 #install.packages("e1071")
#install.packages("arules")
#install.packages("mclust")
#install.packages("np")
#install.packages("kernlab")
#install.packages("arm")
#install.packages("RWeka", dependencies = TRUE)
#install.packages("MASS")
#install.packages("Matrix")
#install.packages("glmnet")
#install.packages("bnlearn")
#install.packages("FNN")
#install.packages("ada")
#install.packages("igraph")
#install.packages("mclust")
#install.packages("psych")
#install.packages("gdata")
#install.packages("rpart.plot")
#source("http://bioconductor.org/biocLite.R")
#biocLite("graph")
#library(RWeka)
library(rpart.plot)
library(gdata)
library(psych)
library(Matrix)
library(glmnet)
library(MASS)
library(arm)
library(kernlab)
library(np)
library(graph)
library(bnlearn)
library(boot)
library(FNN)
library(ada)
library(igraph)
library(mclust)
library(arules)
library(xtable)
library(psychometric)
library(ggplot2)
library(rpart)
library(e1071)
options(digits=2,scipen=2)
N<-5
alpha<-function(x){
length(x)*mean(x)/(1+(length(x)-1)*mean(x))}
ztrunc<-function(t){
  q<-t
  t<-ifelse(abs(t)<0.001,.01,t)
  t<-sprintf("%.2f",t)
  t<-substr(t,ifelse(q<0,3,2),nchar(t))
  t<-ifelse(q<0,paste("-",t),t)
  t<-ifelse(q>1,sprintf("%.2f",q),t)
}
@

<<exp1,results=hide,fig=false,echo=false>>=
#Data Preprocessing##
vmoo<-na.omit(vmoo)
frame<-vmoo
frame$more.electricity<-frame$more.electriciy
frame$eqmore.electricity<-ifelse(frame$more.electricity>3,1,-1)
frame$eqmore.reduce<-ifelse(frame$more.reduce>3,1,-1)
frame$eqcare.environment<-ifelse(frame$care.environment>3,1,-1)
frame$eqprotect.environment<-ifelse(frame$protect.environment>3,1,-1)
frame$eqsave.money<-ifelse(frame$save.money>3,1,-1)
frame$eqcontrol<-ifelse(frame$control>3,1,-1)
frame$eqreliability<-ifelse(frame$reliability>3,1,-1)
frame$eqenergy.independence<-ifelse(frame$energy.indeendence>3,1,-1)
frame$eqmore.choices<-ifelse(frame$more.choices>3,1,-1)
frame$eqavoid.waste<-ifelse(frame$avoid.waste>3,1,-1)
frame$eqcontrol.consume<-ifelse(frame$control.consume>3,1,-1)
frame$eqbuy.new.tech<-ifelse(frame$buy.new.tech>3,1,-1)
frame$eqeco.purchase<-ifelse(frame$eco.purchase>3,1,-1)
frame$eqdelicate.nature<-ifelse(frame$delicate.nature>3,1,-1)
frame$eqmodify.environment<-ifelse(frame$modify.environment>3,-1,1)
frame$rule.nature<-ifelse(frame$rule.nature>3,-1,1)
frame$use.plants.animals<-ifelse(frame$use.plants.animals>3,-1,1)
frame$eqspaceship.earth<-ifelse(frame$spaceship.earth>3,1,-1)
frame$eqgrowth.limits<-ifelse(frame$growth.limits>3,1,-1)
frame$eqsatisfied.utility<-ifelse(frame$satisfied.utility>3,1,-1)
frame$eqtrust.utility<-ifelse(frame$trust.utility>3,1,-1)
frame$eqcontact.blackouts<-ifelse(frame$contact.blackouts=="yes",-1,1)
frame$eqcontact.billing<-ifelse(frame$contact.billing=="yes",-1,1)
frame$eqcontact.provision<-ifelse(frame$contact.provision=="yes",-1,1)
frame$eqnever.contacted<-ifelse(frame$never.contacted=="no",1,-1)
frame$eqprevious.program<-ifelse(frame$previous.program=="Yes",1,-1)
frame$eqcfl<-ifelse(frame$cfl=="Yes",1,-1)
frame$eqtracking.device<-ifelse(frame$tracking.device=="Yes",1,-1)
frame$eqefficient.appliances<-ifelse(frame$efficient.appliances=="Yes",1,-1)
frame$eqinsulated.home<-ifelse(frame$insulated.home=="Yes",1,-1)
frame$eqflu.shot<-ifelse(frame$flu.shot=="Yes",1,-1)
frame$eqrecycled<-ifelse(frame$recycled=="Yes",1,-1)
frame$eqfour.oh.one.k<-ifelse(frame$four.oh.one.k=="Yes",1,-1)
frame$eqlibrary<-ifelse(frame$library=="Yes",1,-1)
frame$eqprize.drawing<-ifelse(frame$prize.drawing=="Yes",1,-1)
frame$eqdonated.money<-ifelse(frame$donated.money=="Yes",1,-1)
frame$eqdonated.time<-ifelse(frame$donated.time=="Yes",1,-1)
frame$eqlottery.ticket<-ifelse(frame$lottery.ticket=="Yes",1,-1)
frame$eqhave.smart.meter<-ifelse(frame$have.smart.meter=="Yes",1,-1)
frame$eqheard.of.ihd<-ifelse(frame$heard.of.ihd=="Yes",1,-1)
frame$eqihd.save.electricity<-ifelse(frame$ihd.save.electricity>3,1,-1)
frame$eqihd.save.money<-ifelse(frame$ihd.save.money>3,1,-1)
frame$eqihd.enjoy<-ifelse(frame$ihd.learn>3,1,-1)
frame$eqcontinue.survey<-ifelse(frame$continue.survey=="Continue",1,-1)

frame$eqsum<-frame$eqmore.electricity+frame$eqmore.reduce+frame$eqcare.environment+frame$eqprotect.environment+frame$eqsave.money+frame$eqcontrol+frame$eqreliability+frame$eqenergy.independence+frame$eqmore.choices+frame$eqavoid.waste+frame$eqcontrol.consume+frame$eqbuy.new.tech+frame$eqeco.purchase+frame$eqdelicate.nature+frame$eqmodify.environment+frame$rule.nature+frame$use.plants.animals+frame$eqspaceship.earth+frame$eqgrowth.limits+frame$eqsatisfied.utility+frame$eqtrust.utility+frame$eqcontact.blackouts+frame$eqcontact.billing+frame$eqcontact.provision+frame$eqnever.contacted+frame$eqprevious.program+frame$eqcfl+frame$eqtracking.device+frame$eqefficient.appliances+frame$eqinsulated.home+frame$eqflu.shot+frame$eqrecycled+frame$eqfour.oh.one.k+frame$eqlibrary+frame$eqprize.drawing+frame$eqdonated.money+frame$eqdonated.time+frame$eqlottery.ticket+frame$eqhave.smart.meter+frame$eqheard.of.ihd+frame$eqihd.save.electricity+frame$eqihd.save.money+frame$eqihd.enjoy+frame$eqcontinue.survey

##IHD Psychometrics##
vmoo.sum<-vmoo
IHD1<-cbind(vmoo$ihd.enjoy,vmoo$ihd.save.money)
IHD1<-cbind(IHD1,vmoo$ihd.save.electricity)
IHD1<-cbind(IHD1,vmoo$ihd.learn)
r12<-cor(IHD1[,1],IHD1[,2],use="complete")
r13<-cor(IHD1[,1],IHD1[,3],use="complete")
r14<-cor(IHD1[,1],IHD1[,4],use="complete")
r23<-cor(IHD1[,2],IHD1[,3],use="complete")
r24<-cor(IHD1[,2],IHD1[,4],use="complete")
r34<-cor(IHD1[,3],IHD1[,4],use="complete")
corr<-c(r12,r13,r14,r23,r24,r34)
alpha.ov.IHD1<-alpha(corr)
alpha.1.IHD1<-alpha(corr[4:6])
alpha.2.IHD1<-alpha(corr[c(2,3,6)])
alpha.3.IHD1<-alpha(corr[c(1,3,5)])
alpha.4.IHD1<-alpha(corr[c(1,2,4)])
total.IHD1<-IHD1
t.ov.IHD1<-c(IHD1[,1]+IHD1[,2]+IHD1[,3]+IHD1[,4])
total.IHD1<-cbind(total.IHD1,t.ov.IHD1)
it1.IHD1<-cor(total.IHD1[,1],c(IHD1[,2]+IHD1[,3]+IHD1[,4]),use="complete")
it2.IHD1<-cor(total.IHD1[,2],c(IHD1[,1]+IHD1[,3]+IHD1[,4]),use="complete")
it3.IHD1<-cor(total.IHD1[,3],c(IHD1[,1]+IHD1[,2]+IHD1[,4]),use="complete")
it4.IHD1<-cor(total.IHD1[,4],c(IHD1[,1]+IHD1[,2]+IHD1[,3]),use="complete")
fit.IHD1<-princomp(IHD1,cor=TRUE)
SF.IHD1<-summary(fit.IHD1)
L.IHD1<-loadings(fit.IHD1)
vars.IHD1<-SF.IHD1$sdev^2
vars.IHD1<-vars.IHD1/sum(vars.IHD1)
cumu.IHD1<-cumsum(vars.IHD1)
#plot(fit,type="lines", main="Scree Plot")
##NEP Psychometrics##
NEP1<-cbind(reverse.code(-1,vmoo$use.plants.animals),reverse.code(-1,vmoo$modify.environment))
NEP1<-cbind(NEP1,reverse.code(-1,vmoo$rule.nature))
NEP1<-cbind(NEP1,vmoo$delicate.nature)
NEP1<-cbind(NEP1,vmoo$growth.limits)
NEP1<-cbind(NEP1,vmoo$spaceship.earth)
r12<-cor(NEP1[,1],NEP1[,2],use="complete")
r13<-cor(NEP1[,1],NEP1[,3],use="complete")
r14<-cor(NEP1[,1],NEP1[,4],use="complete")
r15<-cor(NEP1[,1],NEP1[,5],use="complete")
r16<-cor(NEP1[,1],NEP1[,6],use="complete")
r23<-cor(NEP1[,2],NEP1[,3],use="complete")
r24<-cor(NEP1[,2],NEP1[,4],use="complete")
r25<-cor(NEP1[,2],NEP1[,5],use="complete")
r26<-cor(NEP1[,2],NEP1[,6],use="complete")
r34<-cor(NEP1[,3],NEP1[,4],use="complete")
r35<-cor(NEP1[,3],NEP1[,5],use="complete")
r36<-cor(NEP1[,3],NEP1[,6],use="complete")
r45<-cor(NEP1[,4],NEP1[,5],use="complete")
r46<-cor(NEP1[,4],NEP1[,6],use="complete")
r56<-cor(NEP1[,5],NEP1[,6],use="complete")
corr<-c(r12,r13,r14,r15,r16,r23,r24,r25,r26,r34,r35,r36,r45,r46,r56)
corr2<-c(r13,r15,r16,r25,r26,r35,r36,r56)
alpha.ov.NEP1<-alpha(corr)
alpha.ov.NEP1.red<-alpha(corr2)
alpha.1.NEP1<-alpha(corr[c(6:15)])
alpha.2.NEP1<-alpha(corr[c(2,3,4,5,10:15)])
alpha.3.NEP1<-alpha(corr[c(1,3,4,5,7,8,9,13:15)])
alpha.4.NEP1<-alpha(corr[c(1,2,4,5,6,7,9,11,12)])
alpha.5.NEP1<-alpha(corr[c(1,2,3,5,6,7,9,10,12,14)])
alpha.6.NEP1<-alpha(corr[c(1,2,3,4,6,7,8,10,11,13)])
total.NEP1<-NEP1
t.ov.NEP1<-c(NEP1[,1]+NEP1[,2]+NEP1[,3]+NEP1[,4]+NEP1[,5]+NEP1[,6])
total.NEP1<-cbind(total.NEP1,t.ov.NEP1)
it1.NEP1<-cor(total.NEP1[,1],c(NEP1[,2]+NEP1[,3]+NEP1[,4]+NEP1[,5]+NEP1[,6]),use="complete")
it2.NEP1<-cor(total.NEP1[,2],c(NEP1[,1]+NEP1[,3]+NEP1[,4]+NEP1[,5]+NEP1[,6]),use="complete")
it3.NEP1<-cor(total.NEP1[,3],c(NEP1[,1]+NEP1[,2]+NEP1[,4]+NEP1[,5]+NEP1[,6]),use="complete")
it4.NEP1<-cor(total.NEP1[,4],c(NEP1[,1]+NEP1[,2]+NEP1[,3]+NEP1[,5]+NEP1[,6]),use="complete")
it5.NEP1<-cor(total.NEP1[,5],c(NEP1[,1]+NEP1[,2]+NEP1[,3]+NEP1[,4]+NEP1[,6]),use="complete")
it6.NEP1<-cor(total.NEP1[,6],c(NEP1[,1]+NEP1[,2]+NEP1[,3]+NEP1[,4]+NEP1[,5]),use="complete")
fit.NEP1<-princomp(NEP1[,-c(2,4)],cor=TRUE)
SF.NEP1<-summary(fit.NEP1)
L.NEP1<-loadings(fit.NEP1)
vars.NEP1<-SF.NEP1$sdev^2
vars.NEP1<-vars.NEP1/sum(vars.NEP1)
cumu.NEP1<-cumsum(vars.NEP1)
#plot(fit,type="lines",main="Scree Plot")
##
###MLE Logistic###
glm1<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+ihd.enjoy+ihd.save.money+ihd.save.electricity+hours.home+use.plants.animals+care.environment+delicate.nature+growth.limits+spaceship.earth,data=frame,family=binomial(link="logit"))
glmcv<-cv.glm(frame,glm1,K=10)
gm<-table(ifelse(frame$enroll=="Yes",1,0),ifelse(invlogit(predict(glm1))>0.5,1,0))
gmm1<-gm[1]+gm[2]
gmm2<-gm[3]+gm[4]
gmm3<-gm[1]+gm[3]
gmm4<-gm[2]+gm[4]
gmm<-sqrt(gmm1*gmm2*gmm3*gmm4)
qr<-frame
glmiss<-c()
for(i in 1:N){
q1<-qr[sample(nrow(qr),replace=T),]
glm1<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+ihd.enjoy+ihd.save.money+ihd.save.electricity+hours.home+use.plants.animals+care.environment+delicate.nature+growth.limits+spaceship.earth,data=q1,family=binomial(link="logit"))
c<-ifelse(frame$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm1,frame))>0.5,1,0) 
glmiss[i]<-sum(abs(c-d))}
glm1<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+ihd.enjoy+ihd.save.money+ihd.save.electricity+hours.home+use.plants.animals+care.environment+delicate.nature+growth.limits+spaceship.earth,data=frame,family=binomial(link="logit"))
##MLE Logistic with Factors##
frame.fac<-cbind(vmoo,SF.IHD1$scores[,1])
frame.fac<-cbind(frame.fac,SF.NEP1$scores[,c(1:2)])
colnames(frame.fac)[67:69]<-c("IHD1.fac","NEP1.fac","NEP2.fac")
glm1.fac<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+hours.home+care.environment+IHD1.fac+NEP1.fac+NEP2.fac,data=frame.fac,family=binomial(link="logit"))
glmcv.fac<-cv.glm(frame.fac,glm1.fac,K=10)
gm.fac<-table(ifelse(frame.fac$enroll=="Yes",1,0),ifelse(invlogit(predict(glm1.fac))>0.5,1,0))
gmm1<-gm.fac[1]+gm.fac[2]
gmm2<-gm.fac[3]+gm.fac[4]
gmm3<-gm.fac[1]+gm.fac[3]
gmm4<-gm.fac[2]+gm.fac[4]
gmm.fac<-sqrt(gmm1*gmm2*gmm3*gmm4)
qr<-frame.fac
glmiss.fac<-c()
for(i in 1:N){
q1<-qr[sample(nrow(qr),replace=T),]
glm1.fac<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+hours.home+care.environment+IHD1.fac+NEP1.fac+NEP2.fac,data=q1,family=binomial(link="logit"))
c<-ifelse(frame.fac$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm1.fac,frame.fac))>0.5,1,0) 
glmiss.fac[i]<-sum(abs(c-d))}
####
##Take The Best##
glm2<-glm(enroll~ihd.enjoy,data=frame,family=binomial(link="logit"))
glmcv2<-cv.glm(frame,glm2,K=10)
gm2<-table(ifelse(frame$enroll=="Yes",1,0),ifelse(invlogit(predict(glm2))>0.5,1,0))      
gmm21<-gm2[1]+gm2[2]
gmm22<-gm2[3]+gm2[4]
gmm23<-gm2[1]+gm2[3]
gmm24<-gm2[2]+gm2[4]
gmm2<-sqrt(gmm21*gmm22*gmm23*gmm24)
q<-frame
ttbmiss<-c()
for(i in 1:N){
q1<-q[sample(nrow(q),replace=T),]
glm2<-glm(enroll~ihd.enjoy,data=q1,family=binomial(link="logit"))
ttbmiss[i]<-sum(abs(ifelse(frame$enroll=="Yes",1,0)-ifelse(invlogit(predict(glm2,frame))>0.5,1,0)))
}
####
##Equal Weights##
glmeq<-glm(enroll~eqsum,data=frame,family=binomial(link="logit"))
glmcv3<-cv.glm(frame,glmeq,K=10)
eqm<-table(ifelse(frame$enroll=="Yes",1,0),ifelse(invlogit(predict(glmeq))>0.5,1,0))
miss1<-eqm[2]+eqm[3]
q<-frame
miss<-c()
for(i in 1:N){
q1<-q[sample(nrow(q),replace=T),]
glmeq<-glm(enroll~eqsum,data=q1,family=binomial(link="logit"))
miss[i]<-sum(abs(ifelse(frame$enroll=="Yes",1,0)-ifelse(invlogit(predict(glmeq,frame))>0.5,1,0)))
}
eqm1<-eqm[1]+eqm[2]
eqm2<-eqm[3]+eqm[4]
eqm3<-eqm[1]+eqm[3]
eqm4<-eqm[2]+eqm[4]
eqq<-sqrt(eqm1*eqm2*eqm3*eqm4)
##
##Recursive Partitioning Classification Tree##
rp1.noihd<-rpart(enroll~.,data=vmoo[,-c(1,57:60)])
rp1<-rpart(enroll~.,data=vmoo[,-1])
rp1t<-table(ifelse(vmoo$enroll=="Yes",1,0),ifelse(predict(rp1,vmoo)[,2]>0.5,1,0))
rp1t1<-rp1t[1]+rp1t[2]
rp1t2<-rp1t[3]+rp1t[4]
rp1t3<-rp1t[1]+rp1t[3]
rp1t4<-rp1t[2]+rp1t[4]
rp1tt<-sqrt(rp1t1*rp1t2*rp1t3*rp1t4)
qz<-vmoo
rpmiss<-c()
for(i in 1:N){
q1<-qz[sample(nrow(qz),replace=T),]
rp1<-rpart(enroll~.,data=qz[,-1])
rpmiss[i]<-sum(abs(ifelse(vmoo$enroll=="Yes",1,0)-ifelse(predict(rp1,vmoo)[,2]>0.5,1,0)))}
fold.cart.miss<-c()
q1<-vmoo
size<-length(q1[,1])
fold.d<-c()
for(i in 1:size){
fold.d[i]<-sample(c(1:10),1,replace=FALSE)
}
q1<-cbind(q1,fold.d)
for(j in 1:max(fold.d)){
rp1<-rpart(enroll~.,data=q1[-c(q1$fold.d==j),-1])
a<-ifelse(q1$enroll[q1$fold.d==j]=="Yes",1,0)
b<-ifelse(predict(rp1,q1[fold.d==j,])[,2]>0.5,1,0)
fold.cart.miss[j]<-sum(abs(a-b))/length(q1$fold.d[fold.d==j])}
rp1<-rpart(enroll~.,data=vmoo[,-1])
##
##Recursive Partitioning Classification Tree (no IHD)##
rp1.1<-rpart(enroll~.,data=vmoo[,-c(1,57:60)])
rp1.1t<-table(ifelse(vmoo$enroll=="Yes",1,0),ifelse(predict(rp1.1,vmoo)[,2]>0.5,1,0))
rp1.1t1<-rp1.1t[1]+rp1.1t[2]
rp1.1t2<-rp1.1t[3]+rp1.1t[4]
rp1.1t3<-rp1.1t[1]+rp1.1t[3]
rp1.1t4<-rp1.1t[2]+rp1.1t[4]
rp1.1tt<-sqrt(rp1.1t1*rp1.1t2*rp1.1t3*rp1.1t4)
qz<-vmoo
rpmiss1<-c()
for(i in 1:N){
q1<-qz[sample(nrow(qz),replace=T),]
rp1.1<-rpart(enroll~.,data=qz[,-c(1,57:60)])
rpmiss1[i]<-sum(abs(ifelse(vmoo$enroll=="Yes",1,0)-ifelse(predict(rp1.1,vmoo)[,2]>0.5,1,0)))}
fold.cart.miss1<-c()
q1<-vmoo
size<-length(q1[,1])
fold.d<-c()
for(i in 1:size){
fold.d[i]<-sample(c(1:10),1,replace=FALSE)
}
q1<-cbind(q1,fold.d)
for(j in 1:max(fold.d)){
rp1.1<-rpart(enroll~.,data=q1[-c(q1$fold.d==j),-c(1,57:60)])
a<-ifelse(q1$enroll[q1$fold.d==j]=="Yes",1,0)
b<-ifelse(predict(rp1.1,q1[fold.d==j,])[,2]>0.5,1,0)
fold.cart.miss1[j]<-sum(abs(a-b))/length(q1$fold.d[fold.d==j])}
rp1.1<-rpart(enroll~.,data=vmoo[,-c(1,57:60)])
##
@ 
\section{Introduction}
A valid behavioral energy efficiency trial must address the challenge of recruitment.  For any trial it is challenging to get customers to sign up, and if they do not, it is even more difficult to determine why.  Volunteer bias can occur when volunteers and non-volunteers respond differently to an energy efficiency program, particularly if researchers are unaware of this difference or unable to compensate for it.  Using an all-volunteer sample is likely to overestimate program benefits, as early adopters can be more responsive than the general population purely because of their enthusiasm.  In an extreme example, if people choose to participate in a trial based only on accurate knowledge about whether they will benefit, the study sample will be comprised wholly of participants that will benefit, and the remaining non-participant population will consist wholly of those that would not benefit \cite{chassang2010selective}.  A failure to understand this would lead researchers to incorrectly conclude that the remaining general population would greatly benefit from the intervention when they would actually not benefit at all.  Even in less extreme cases, any volunteer bias threatens the ability to generalize from the sample to the population of interest.

The quality of recruitment determines what inferences from the study's sample to the population of interest are valid, meaning recruitment must be treated as a critical ``part of the science'' \cite{hewison2006overcoming}, rather than being dismissed as an administrative nuisance.  In this paper we discuss the evidence on the causes of volunteer bias in behavioral research and a method of accommodating bias when, despite best efforts, it does occur \cite{fischhoff1999construal}.

Our approach is comprised of two parts.  First we develop a simple questionnaire that can be used to predict who will volunteer in an energy efficiency trial of in-home displays.  We derive questionnaire items from previous research on volunteering \cite{rosenthal1975volunteer} and evaluate their psychometric properties \cite{devellis2011scale,loehlin2004latent,demars2010item,spector1992summated}.  Second, we develop statistical models based on this questionnaire that accurately predict intentions to volunteer for an in-home display trial.  To test the quality of these statistical models, we compare modern machine learning algorithms against simpler, often better performing alternatives \cite{armstrong1985crystal,dana2004superiority,gigerenzer1999betting}.  To avoid overfitting the data \cite{leamer1983let,fischhoff1982those}, we use cross-validated and bootstrapped errors metrics to estimate generalization error rather than using in-sample error estimates \cite{efron1993introduction}.

%This sensitivity comes from knowledge of the psychology of prediction and explanation, as people believe that predictions are more certain than they really are \cite{soyer2012illusion}, and it is possible to fit the data well by fiddling with the predictors and sample to get a desired result \cite{leamer1983let,fischhoff1982those}.  To do this, we benchmark more complex machine learning algorithms against simpler models that usually outperform complex models \cite{armstrong1985crystal,dana2004superiority,gigerenzer1999betting}, using cross-validated and bootstrapped errors metrics for estimates of generalization error \cite{efron1993introduction}, and also compare the ability of the different models to predict real, independently sampled, test data.

\section{Prevalence of Volunteer Bias}
Volunteer bias in behavioral energy trials has predominantly occurred in two ways.  The ``usual'' volunteer bias, where people self-select into the study and are then randomly assigned to condition, makes inferences from sample to population uncertain.  A second type of volunteer bias, where studies recruit the control and treatment groups differently, not only makes inferences from sample to population uncertain, but also between groups within the sample.  This means that any comparison between treatment and control groups cannot separate the effectiveness of the treatment (the in-home display) from differences between how samples were obtained.

As an example of the first type of volunteer bias, the BC Hydro PowerCost Monitor Time-of-Use pilot used single family dwellings in British Columbia with an opt-in design \cite{sulyma2008experimental}.  Those recruited were more educated, had higher annual household income, were more knowledgeable about electricity conservation, more active in trying to save energy, more willing to change habits, and used on average 1700 kWh less than other comparable homes in the BC area.  This study fits into the ``usual'' volunteer bias category because random assignment occurred \emph{after} participants opted in, thus allowing valid comparisons between groups in the sample but not extrapolation from sample to population.

Almost every trial of in-home displays succumbs to the second, more severe, form of volunteer bias.\footnote{This does not merely apply to in-home display studies, but we focus on them here to keep the discussion shorter.  Davis \emph{et al.} provide additional references \cite{davis2012setting} to studies on dynamic pricing and home automation.}  For example, the Milton Hydro Direct Energy Smart Home Energy Conservation Kit study recruited participants using telephone, direct mail, and billing inserts \cite{schembri2008influence}.  Those eligible to receive an in-home display had to be at least 18 years old, must have lived in the home for at least one year, did not plan to move, and expressed a willingness to complete two surveys during the study.  Eligible customers who expressed interest and registered online were then contacted for an installation appointment based on the order they registered, resulting in 108 homes having an in-home display installed for free.

The control group was recruited differently, consisting of 23 volunteers from a pool of 300 recruited customers who had homes that were judged to be of similar size and age to that in the treatment group, who completed a survey for a \$100 gift certificate, and who lived in geographic clusters near the treatment group homes.  This pattern of recruiting those in the treatment and control groups differently holds for almost every other trial of in-home displays, including the Oberlin TED5000 study \cite{allen2006effects}, the Ontario Energy Board Hydro One pilot \cite{frank2008hydro}, the Energy Trust of Oregon PowerCost Monitor study \cite{sipe2009net}, the Baltimore Gas and Electric Smart Energy Pricing Pilot with the Energy Orb \cite{faruqui2009bge}, the Omaha Public Power study \cite{eiden2009investigation}, and the Florida Power and Light Energy Detective study \cite{parker2008pilot}.

Not all pilot studies, however, have been affected by volunteer bias.  The first exception, the Polk's Landing study \cite{mcclelland1979energy}, had displays installed in homes before people bought them, with no way for buyers to know which homes had the displays beforehand.  The Southern California Edison study \cite{sexton1987consumer} used an opt-out design with an opaque opt-out procedure, resulting in no opt-outs.  The Commonwealth Edison Energy Smart Pricing Pilot with Pricelight study \cite{sbc2006evaluation,sbc2007evaluation} and PG\&E's Smart-Rate Pilot \cite{george2010load} both explicitly modeled selection using propensity scores, an approach that is discussed in Section~\ref{sec:adjusting}.

%Intuitive vs. Machine (optimal) vs. Random; Crowdsourced paramorphic bootstrap models \& TTB \& Dawes' Eq. Weights; Recursive partitioning \& MLE Logistic.  Have people/experts rank what are going to be the best predictors or the best weights vs. use model selection with machine learning vs. random choice benchmark. Have them make probability estimates on how likely they think their judgment is correct for each prediction.  Random weights vs. random predictors vs. optimal weights vs. intuitive weights. vs. intuitive predictors  This document will describe our research on predicting volunteerism. [Tamar insert intro here]

\section{Adjusting for Volunteer Bias}
\label{sec:adjusting}
Even if one follows current best practices for recruitment (e.g., \cite{treweek2010strategies,dillman2007mail,edwards2009methods}) some proportion of those who are offered the program will not participate.  Fortunately, if one can create a model of the process of volunteering, and predict who volunteers and who does not, then the risk of incorrect generalization from sample to population can be minimized.

One simple approach is to use \emph{propensity score adjustment} \cite{wooldridge2009introductory,gelman2007data}, that explicitly models each participant's probability of volunteering.  This was done in both PG\&E's Smart-Rate Pilot \cite{george2010load} and the Commonwealth Edison Energy Smart Pricing Pilot \cite{sbc2006evaluation,sbc2007evaluation}. The Commonwealth Edison propensity score model, for example, included whether customers purchased new major appliances, used a fan to reduce costs, lived in a single-family detached home, were above 65 years old, and the number and type of people living in the household.  They found that those who used fans to reduce costs, as well as those who had more people in the household, were significantly less likely to enroll (\emph{t}-values of 2 and 3 respectively in the model), whereas those who lived in a single-family detached home were more likely to participate ($t=2.4$).  This model correctly predicted the enrollment decision of 71\% of those included in the analysis, but did not fare well on other measures of ability to discriminate between volunteers and non-volunteers (e.g., a pseudo $R^2$ of .20).

The statistical approach to addressing volunteer bias using propensity score adjustment must address three unavoidable and seemingly intractable hurdles: (1) finding the right predictors of volunteering, (2) correctly combining the right predictors to accurately predict volunteering, and (3) extracting the necessary data by getting study non-volunteers to agree to complete a survey.  In the next three sections we outline our approach to dealing with these three problems.

\subsection{Predictor Variables}
The first challenge of building a propensity score model of volunteering is knowing what predictors to include.  While the problem of volunteer bias has been acknowledged across the social sciences, and research on volunteering is more than 80 years old \cite{rosenzweig1933experimental,edgerton1947objective,wallin1949volunteer}, many studies discussing why people participate (both in research studies and civic engagement) have been observational, retrospective, and failed to include a range of variables broad enough to adequately predict volunteering to the degree necessary to adjust for bias.

Perhaps the best resource that exists on the topic is Rosenthal and Rosnow's classic review \cite{rosenthal1975volunteer}, who found that volunteering for psychological research experiments was associated with demographic factors such as education, socioeconomic status (with more highly educated and higher SES groups being more likely to volunteer), and sex (women volunteer more often than men).  Volunteers were also more motivated to seek approval and more sociable than non-volunteers, indicating psychological factors also play a role.  Other evidence suggests that those who are proficient volunteers are more likely to volunteer \cite{callahan2007volunteer}.  In the case of an in-home display trial, for example, those who have previously volunteered for energy initiatives or engaged in energy conservation behavior may also volunteer to be part of an in-home display trial.

This research is limited, however, as it largely relies on understanding why university students decide to volunteer for psychology experiments, a decision that is likely insensitive to contextual factors that matter in real world recruitment.  Those factors that are linked to the specific offering, rather than fixed attributes of the volunteer (e.g., demographics), may be as good or better predictors of volunteering, even in psychological experiments.

One example of such a contextual factor is the appeal and relevance of the specific initiative being presented.  In one case, inpatients in an alcoholism rehabilitation program who volunteered for a trial on sexual behavior showed greater concerns about sexual functioning and higher incidences of sexual problems than non-volunteers in the same program \cite{nirenberg1991volunteer}.  Similarly, college students who volunteer for research on sexual behavior tend to differ from non-volunteers on sexual characteristics, such as sexual experience and confidence, but do not differ on personality or demographic variables \cite{strassberg1995volunteer,wiederman1999volunteer,wolchik1983volunteer}.

%In the context of in-home displays, volunteering for a research study with no incentive may evoke a pro-social motivation to help the researcher and advance scientific knowledge, and thus would be partly determined by individual differences in altruistic personality or behavior \cite{van2011volunteers}.  On the other hand, volunteering for an energy initiative that offers a financial incentive may undermine this altruistic motivation, as people will generally act to maximize their perceived net benefits within the limits of their cognitive and emotional make-up \cite{fishbein1979theory,ajzen1991theory}. 

%In-home displays may also appeal to different people for different reasons, meaning more complex multivariate approaches must be used to discover how clusters of factors (rather than single independent ones) are associated with enrollment decisions.  Existing work on such clustering or segmentation analyses in the context of the smart grid report that a range of behavioral motivations can matter, from environmental concern to energy independence,\footnote{\url{http://smartgridcc.org/research/sgcc-research}} but multivariate clustering approaches have not yet been applied to decisions to volunteer for energy efficiency trials.  

\subsection{Prediction Method}
Second, once a set of predictors is chosen the most predictive combination of them must be discovered among a large number of possibilities.  Traditional approaches to propensity score models use Logistic Regression, as was done in the Commonwealth Edison Energy Smart Pricing Pilot \cite{sbc2006evaluation,sbc2007evaluation} and PG\&E's Smart-Rate Pilot \cite{george2010load}.  This approach is severely limited, as the predictors must combine linearly, cannot interact with each other, and must be selected ahead of time.  As a result, Logistic Regression misses non-linear and configural relationships in the data, and will often include too many variables leading to overestimation of the model's predictive validity.  Modern machine learning approaches overcome these issues by allowing the set of predictor variables in the model to combine in non-linear and configural ways as well as using automatic methods of selecting variables that improve prediction while also removing variables that are likely to overfit the data.

To find a prediction method that does this, we began by looking at the most successful multivariate machine learning approaches \cite{wu2008top}.  However, on the data we collected most of these approaches performed poorly or at least worse than simpler methods.  Instead of summarizing the performance of all the approaches, we limit our summary to what is most commonly used for multivariate classification (Logistic Regression) \cite{bishop2006pattern} and what we found to be the best machine learning approach, the Classification Tree \cite{breiman1984classification}, a method that automatically performs variable selection without overfitting and can detect configural (interactive) relationships between predictors, but is unable to discover non-linear decision boundaries without human assistance.

%\footnote{The Logistic Regression and Classification Tree approaches are discussed in greater detail in~\ref{app:glm} and~\ref{app:class}, respectively.}
%including \emph{Support Vector Machines} \cite{vapnik1998statistical,vapnik2000nature,karatzoglou2005support,chang2011libsvm,bishop2006pattern}, \emph{Adaboost} \cite{friedman2001greedy,friedman2000additive,friedman2002stochastic,culp2006ada}, \emph{K-Nearest Neighbors} \cite{ripley2008pattern,venables2002modern,bishop2006pattern}, \emph{Classification Trees} \cite{rpartman,breiman1984classification}, \emph{Bayesian and Regularized Regression} \cite{bishop2006pattern}, \emph{Logistic Regression} using Maximum Likelihood Estimation \cite{bishop2006pattern}, \emph{Kernel Regression} \cite{hayfield2008nonparametric}, and \emph{Naive Bayes} \cite{scutari2010bnlearn,borgelt2009graphical,koller2009probabilistic}.  

Advances in statistical techniques are exciting, making it tempting to apply the newest tool to solve prediction problems.  However, this excitement should be tempered by the finding that simple (improper) statistical rules outperform human prediction \cite{swets2000better,dawes1989clinical} as well as more complex models that estimate optimal weights, such as multiple linear regression \cite{dawes1979robust,dawes1974linear,wilks1938weighting}.  To evaluate the validity of the Logistic Regression and Classification Trees, we benchmark them against two simpler alternatives, the Take the Best (TTB) heuristic \cite{gigerenzer1999betting}, that only uses the best univariate predictor, and Equal Weights, that sums up positive and negative predictors weighting them equally \cite{dawes1979robust,dawes1974linear} 

%Instead of estimating prediction weights for each variable using statistical rules, the equal weights approach merely decides whether each variable should be positively or negatively related to what is being predicted (the criterion).  If the predictor is expected to be positively related to the criterion, then the person is given a score of $+1$ if their value on the predictor is above the middle point of the scale, or $-1$ otherwise \cite{dana2004superiority}.  Variables that are judged to be negatively related to the criterion are reverse coded.\footnote{Our a priori chosen weights are presented with the materials section in~\ref{app:measone}.}  The total score for each person is then the sum of all the predictors based on their a priori chosen weights, and is used to predict the criterion, in our case intention to enroll \cite{dawes1979robust,dawes1974linear}.  Second, when there are many possible explanatory variables, and one is uncertain about their ablity to predict, then making predictions from only the best predictor works well \cite{gigerenzer1999betting}.  This has been called the Take the Best Heuristic (TTB), and will perform better than other approaches \cite{dana2004superiority} when the predictive ability of the best predictor is roughly equal to the total multiple correlation.

%This is particularly true in social science contexts involving human behavior, where samples are small and prediction error is high \cite{dana2004superiority}.  For example, when it is relatively difficult to predict the variable of interest, in our case intention to enroll, to outperform simpler methods one may need a sample size that is 50 times larger than the number of predictors in the multiple regression model.  Without a sample size of 30 times the number of predictors, multiple regression is unlikely to outperform simpler methods, regardless of the overall multiple correlation.  In sum \cite{dana2004superiority}:

%\begin{quote}
%  ``Regression coefficients should not be used for predictions unless error is likely to be extremely small by social science standards or sample sizes will be larger than 100 observations to predictors.  In other words, regression coefficients should almost never be used for social science predictions.''
%\end{quote}

\subsection{Recruiting Non-Volunteers}

The third, and most important, reason why it is difficult to develop a good model of volunteering is that it is hard to collect data on non-volunteers.  Developing and validating a predictive model requires non-volunteers to respond to other measures, such as a survey or phone interview.  It is likely that those who don't want to participate in the efficiency program also don't want to subsequently respond to a survey or interview.  Thus, any model of volunteering must rely either on data that is indirectly collected (e.g., public records), or from short, easily completed questionnaires that are highly likely to be responded to.

\section{Study}
We present a study that develops a simple questionnaire and statistical model to address volunteer bias.\footnote{A similar study was conducted before this one, with similar, although not identical, results.  The data and statistical analyses for this study can be obtained from here \url{http://hdl.handle.net/1902.1/19154}, labeled ``Study One.''}  The study presents participants with a recruitment offering that is based on best practices described elsewhere \cite{treweek2010strategies} along with a questionnaire that uses measures to predict intentions to volunteer for an in-home display trial.  We focus on developing the shortest questionnaire possible so that non-volunteers would be likely to complete it with minimal incentive as well as evaluating statistical models of volunteering, comparing complex optimized approaches (Logistic Regression and Classification Trees) to simpler ones (the best predictor and equal weights).  Participants were first asked if they would be willing to volunteer for the program and were then asked a series of predictor questions.\footnote{All materials can be found in~\ref{app:meastwo}.} 

\subsection{Methods}
\subsubsection{Participants}
The participants were 279 U.S. residential bill-payers recruited on Amazon's MTurk.  Their average age was \Sexpr{prettyNum(mean(vmoo2$age,na.rm=TRUE))} years (range \Sexpr{prettyNum(min(vmoo2$age))} -- \Sexpr{prettyNum(max(vmoo2$age))}), 92 (42\%) were women, and their self-reported average monthly bill was \$\Sexpr{prettyNum(mean(vmoo2$average.bill,na.rm=TRUE))} (range \$\Sexpr{prettyNum(min(vmoo2$average.bill,na.rm=TRUE))}--\$\Sexpr{prettyNum(max(vmoo2$average.bill,na.rm=TRUE))}). 

\subsubsection{Materials}
Standard demographics were used, namely age, gender, employment status, education, annual household income, race, and political affiliation.

The first category of questions was about \emph{constraints} or barriers to enrollment in the trial.  First, potential participants must trust the study sponsor, as expecting deception, manipulation, or harm would undermine the perceived worth of the study.  People who trust the study sponsors must then feel competent enough to succeed in the trial and able to control their electricity consumption.  If one feels unable to use the in-home display or unable to control electricity consumption, then participation in the trial would be pointless.  This possibility was examined using a small set of items about self-efficacy.  Lastly, participants must be in the home to use the in-home display, so they were asked for the total hours they spent in the home during the day and whether they were in the home for each of six 4-hour time periods.

In addition to constraints, people may volunteer based on \emph{motivations} or \emph{topical interest}.  People may be motivated to volunteer if they are very embedded socially, having close friends that they frequently talk to.  A second motivation is a concern for saving money, as was captured by the frugality scale.  A third scale had to do with the degree to which people enjoy exploring new things, the exploration scale, which may capture expected enjoyment in the study independent of financial benefit.

%Other motivations to participate, for example the desire to have more reliable service, were adapted from a previous study \cite{krishnamurti2011preparing}.

In terms of topics, we expected the program to be construed in one of three ways: 1) a study about environmental behaviors, 2) a study about in-home displays, or 3) a study about eco-friendly technologies more generally.  Interest in the first topic was measured using the New Ecological Paradigm (NEP) \cite{dunlap2000new}, a widely used measure of environmental attitudes.  Interest in the second topic was measured using a four-item scale of their attitudes and expectations of the in-home display, such as whether they expected it to help them save money.  General interest in environmentally friendly technologies were measured using a separate ``eco-technology'' scale.

%Lastly, two questions assessed other volunteering behaviors, asking whether participants had previously volunteered in an energy efficiency program, and whether they would be willing to complete additional questions after the survey.  Several similar questions asked whether participants engaged in civic behaviors, for example using a public library.

\subsection{Results}
\subsubsection{Psychometric Analyses and Univariate Prediction}

The psychometric analyses \cite{devellis2011scale,loehlin2004latent,demars2010item} use Chronbach's alpha \cite{cronbach1951coefficient} and item-total correlations \cite{spector1992summated} as measures of internal consistency (reliability), and Principal Components Analysis \cite{frs1901liii} as a measure of dimensional structure.  To create a short questionnaire that non-volunteers would be inclined to complete, we limit each construct to four questions and use a rule of thumb that Chronbach's alpha must be greater than .7 \cite{nunnally1967psychometric} for an item to be included in a factor model, and elements with the lowest item-total correlations or lower than .4 were dropped \cite{spector1992summated}.

<<test error,echo=false,results=hide,fig=false>>=
#vmoo2<-read.csv("vmoo2.csv",na.strings="")
#vmoo2.fac<-na.omit(vmoo2)
#vmoo2.fac<-cbind(vmoo2.fac,-SF.IHD2$scores[,1])
#vmoo2.fac<-cbind(vmoo2.fac,-SF.NEP2$scores[,1])
#vmoo2.fac<-cbind(vmoo2.fac,-SF.trust2$scores[,1])
#vmoo2.fac<-cbind(vmoo2.fac,-SF.buy$scores[,1])
#vmoo2.fac<-cbind(vmoo2.fac,-SF.eff$scores[,1])
#vmoo2.fac<-cbind(vmoo2.fac,-SF.ex$scores[,1])
#colnames(vmoo2.fac)[94:99]<-c("IHD.Fac","NEP.Fac","Trust.Fac","Buy.Fac","Eff.Fac","Cur.Fac")
##Equal Weights Pre-processing##
vmoo2<-na.omit(vmoo2)
frame2<-vmoo2
frame2$eqmore.reduce<-ifelse(frame2$more.reduce>3,1,-1)
frame2$eqcare.environment<-ifelse(frame2$care.environment>3,1,-1)
frame2$eqconsistent.recycle<-ifelse(frame2$consistent.recycle>3,1,-1)
frame2$eqactive.community<-ifelse(frame2$active.community>3,1,-1)
frame2$eqprotect.environment<-ifelse(frame2$protect.environment>3,1,-1)
frame2$eqcontrol<-ifelse(frame2$control>3,1,-1)
frame2$eqenergy.independence<-ifelse(frame2$energy.indeendence>3,1,-1)
frame2$eqavoid.waste<-ifelse(frame2$avoid.waste>3,1,-1)
frame2$eqcontrol.consume<-ifelse(frame2$control.consume>3,1,-1)
frame2$eqbuy.new.tech<-ifelse(frame2$buy.new.tech>3,1,-1)
frame2$eqeco.purchase<-ifelse(frame2$eco.purchase>3,1,-1)
frame2$eqswitch.products<-ifelse(frame2$switch.products>3,1,-1)
frame2$eqeco.appliance.buy<-ifelse(frame2$eco.appliance.buy>3,1,-1)
frame2$eqeco.equal.buy<-ifelse(frame2$eco.equal.buy>3,1,-1)
frame2$eqobsolete.buy<-ifelse(frame2$obsolete.buy>3,1,-1)
frame2$eqdelicate.nature<-ifelse(frame2$delicate.nature>3,1,-1)
frame2$use.plants.animals<-ifelse(frame2$use.plants.animals>3,-1,1)
frame2$eqspaceship.earth<-ifelse(frame2$spaceship.earth>3,1,-1)
frame2$eqgrowth.limits<-ifelse(frame2$growth.limits>3,1,-1)
frame2$eqfederal.trust<-ifelse(frame2$federal.trust>2,1,-1)
frame2$eqlocal.trust<-ifelse(frame2$local.trust>2,1,-1)
frame2$eqscientists.trust<-ifelse(frame2$scientists.trust>2,1,-1)
frame2$eqcommunity.trust<-ifelse(frame2$community.trust>2,1,-1)
frame2$eqfamily.trust<-ifelse(frame2$family.trust>2,1,-1)
frame2$eqfriends.trust<-ifelse(frame2$friends.trust>2,1,-1)
frame2$eqco.workers.trust<-ifelse(frame2$co.workers.trust>2,1,-1)
frame2$eqtrust.utility<-ifelse(frame2$trust.utility>2,1,-1)
frame2$eqtoo.complicated<-ifelse(frame2$too.complicated>3,-1,1)
frame2$eqnot.capable<-ifelse(frame2$not.capable>3,-1,1)
frame2$equnexpected.problems<-ifelse(frame2$unexpected.problems>3,-1,1)
frame2$eqmake.plans.work<-ifelse(frame2$make.plans.work>3,1,-1)
frame2$eqright.to.work<-ifelse(frame2$right.to.work>3,1,-1)
frame2$eqnew.writers<-ifelse(frame2$new.writers>3,1,-1)
frame2$eqenjoy.mental.challenge<-ifelse(frame2$enjoy.mental.challenge>3,1,-1)
frame2$eqlike.work.problem<-ifelse(frame2$like.work.problem>3,1,-1)
frame2$eqnew.words<-ifelse(frame2$new.words>3,1,-1)
frame2$eqeager.to.know<-ifelse(frame2$eager.to.know>3,1,-1)
frame2$eqnew.subject<-ifelse(frame2$new.subject>3,1,-1)
frame2$eqncare.possessions<-ifelse(frame2$care.possessions>3,1,-1)
frame2$eqno.throw.away<-ifelse(frame2$no.throw.away>3,1,-1)
frame2$eqbetter.use.resources<-ifelse(frame2$better.use.resources>3,1,-1)
frame2$eqreuse.items<-ifelse(frame2$reuse.items>3,1,-1)
frame2$eqcareful.spending<-ifelse(frame2$careful.spending>3,1,-1)
frame2$eqdiscipline.money<-ifelse(frame2$discipline.money>3,1,-1)
frame2$eqwait.purchase<-ifelse(frame2$wait.purchase>3,1,-1)
frame2$eqresist.buying<-ifelse(frame2$resist.buying>3,1,-1)
frame2$eqcfl<-ifelse(frame2$cfl=="Yes",1,-1)
frame2$eqtracking.device<-ifelse(frame2$tracking.device=="Yes",1,-1)
frame2$eqefficient.appliances<-ifelse(frame2$efficient.appliances=="Yes",1,-1)
frame2$eqinsulated.home<-ifelse(frame2$insulated.home=="Yes",1,-1)
frame2$eqflu.shot<-ifelse(frame2$flu.shot=="Yes",1,-1)
frame2$eqrecycled<-ifelse(frame2$recycled=="Yes",1,-1)
frame2$eqfour.oh.one.k<-ifelse(frame2$four.oh.one.k=="Yes",1,-1)
frame2$eqlibrary<-ifelse(frame2$library=="Yes",1,-1)
frame2$eqprize.drawing<-ifelse(frame2$prize.drawing=="Yes",1,-1)
frame2$eqdonated.money<-ifelse(frame2$donated.money=="Yes",1,-1)
frame2$eqdonated.time<-ifelse(frame2$donated.time=="Yes",1,-1)
frame2$eqlottery.ticket<-ifelse(frame2$lottery.ticket=="Yes",1,-1)
frame2$eqhave.smart.meter<-ifelse(frame2$have.smart.meter=="Yes",1,-1)
frame2$eqheard.of.ihd<-ifelse(frame2$heard.of.ihd=="Yes",1,-1)
frame2$eqihd.save.electricity<-ifelse(frame2$ihd.save.electricity>3,1,-1)
frame2$eqihd.save.money<-ifelse(frame2$ihd.save.money>3,1,-1)
frame2$eqihd.enjoy<-ifelse(frame2$ihd.learn>3,1,-1)
frame2$eqcontinue.survey<-ifelse(frame2$continue.survey=="Continue",1,-1)
#frame2$eqcontact.blackouts<-ifelse(frame2$contact.blackouts=="yes",-1,1)
#frame2$eqcontact.billing<-ifelse(frame2$contact.billing=="yes",-1,1)
#frame2$eqcontact.provision<-ifelse(frame2$contact.provision=="yes",-1,1)
#frame2$eqnever.contacted<-ifelse(frame2$never.contacted=="no",1,-1)
#frame2$eqprevious.program<-ifelse(frame2$previous.program=="Yes",1,-1)
#frame2$eqmodify.environment<-ifelse(frame2$modify.environment>3,-1,1)
#frame2$rule.nature<-ifelse(frame2$rule.nature>3,-1,1)
#frame2$eqmore.choices<-ifelse(frame2$more.choices>3,1,-1)
#frame2$more.electricity<-frame2$more.electriciy
#frame2$eqmore.electricity<-ifelse(frame2$more.electricity>3,1,-1)
#frame2$eqsave.money<-ifelse(frame2$save.money>3,1,-1)
#frame2$eqreliability<-ifelse(frame2$reliability>3,1,-1)
#frame2$eqsatisfied.utility<-ifelse(frame2$satisfied.utility>3,1,-1)
frame2$eqsum<-frame2$eqmore.reduce+frame2$eqcare.environment+frame2$eqprotect.environment+frame2$eqcontrol+frame2$eqenergy.independence+frame2$eqavoid.waste+frame2$eqcontrol.consume+frame2$eqbuy.new.tech+frame2$eqeco.purchase+frame2$eqdelicate.nature+frame2$use.plants.animals+frame2$eqspaceship.earth+frame2$eqgrowth.limits+frame2$eqtrust.utility+frame2$eqcfl+frame2$eqtracking.device+frame2$eqefficient.appliances+frame2$eqinsulated.home+frame2$eqflu.shot+frame2$eqrecycled+frame2$eqfour.oh.one.k+frame2$eqlibrary+frame2$eqprize.drawing+frame2$eqdonated.money+frame2$eqdonated.time+frame2$eqlottery.ticket+frame2$eqhave.smart.meter+frame2$eqheard.of.ihd+frame2$eqihd.save.electricity+frame2$eqihd.save.money+frame2$eqihd.enjoy+frame2$eqcontinue.survey

frame2$eqsum2<-frame2$eqmore.reduce+frame2$eqcare.environment+frame2$eqconsistent.recycle+frame2$eqactive.community+frame2$eqprotect.environment+frame2$eqcontrol+frame2$eqenergy.independence+frame2$eqavoid.waste+frame2$eqcontrol.consume+frame2$eqbuy.new.tech+frame2$eqeco.purchase+frame2$eqswitch.products+frame2$eqeco.appliance.buy+frame2$eqeco.equal.buy+frame2$eqobsolete.buy+frame2$eqdelicate.nature+frame2$use.plants.animals+frame2$eqspaceship.earth+frame2$eqgrowth.limits+frame2$eqfederal.trust+frame2$eqlocal.trust+frame2$eqscientists.trust+frame2$eqcommunity.trust+frame2$eqfamily.trust+frame2$eqfriends.trust+frame2$eqco.workers.trust+frame2$eqtrust.utility+frame2$eqtoo.complicated+frame2$eqnot.capable+frame2$equnexpected.problems+frame2$eqmake.plans.work+frame2$eqright.to.work+frame2$eqnew.writers+frame2$eqenjoy.mental.challenge+frame2$eqlike.work.problem+frame2$eqnew.words+frame2$eqeager.to.know+frame2$eqnew.subject+frame2$eqncare.possessions+frame2$eqno.throw.away+frame2$eqbetter.use.resources+frame2$eqreuse.items+frame2$eqcareful.spending+frame2$eqwait.purchase+frame2$eqresist.buying+frame2$eqcfl+frame2$eqtracking.device+frame2$eqefficient.appliances+frame2$eqinsulated.home+frame2$eqflu.shot+frame2$eqrecycled+frame2$eqfour.oh.one.k+frame2$eqlibrary+frame2$eqprize.drawing+frame2$eqdonated.money+frame2$eqdonated.time+frame2$eqlottery.ticket+frame2$eqhave.smart.meter+frame2$eqheard.of.ihd+frame2$eqihd.save.electricity+frame2$eqihd.save.money+frame2$eqihd.enjoy+frame2$eqcontinue.survey+frame2$eqdiscipline.money

##Equal Weights Test##
glmeq.2<-glm(enroll~eqsum,data=frame2,family=binomial(link="logit"))
glmcveq.2<-cv.glm(frame2,glmeq.2,K=10)
eqm.2<-table(ifelse(frame2$enroll=="Yes",1,0),ifelse(invlogit(predict(glmeq.2,frame2))>0.5,1,0))
miss1.2<-eqm.2[2]+eqm.2[3]
q<-frame2
miss<-c()
for(i in 1:N){
q1<-q[sample(nrow(q),replace=T),]
glmeq<-glm(enroll~eqsum,data=q1,family=binomial(link="logit"))
miss[i]<-sum(abs(ifelse(frame$enroll=="Yes",1,0)-ifelse(invlogit(predict(glmeq,frame))>0.5,1,0)))
}
eqm1<-eqm.2[1]+eqm.2[2]
eqm2<-eqm.2[3]+eqm.2[4]
eqm3<-eqm.2[1]+eqm.2[3]
eqm4<-eqm.2[2]+eqm.2[4]
eqq.2<-sqrt(eqm1*eqm2*eqm3*eqm4)
####
####
##Recruitment Approaches##
glm<-glm(enroll~condition-1,data=vmoo2,family=binomial(link="logit"))
qe<-vmoo2
scientists.subscription<-c()
scientists.m<-c()
scientists.doe.m<-c()
scientists.pepco.m<-c()
scientists.pepco.offers.m<-c()
for(i in 1:200){
q1<-qe[sample(nrow(qe),replace=T),]
glm<-glm(enroll~condition-1,data=q1,family=binomial(link="logit"))
scientists.subscription[i]<-invlogit(coef(glm))[1]
scientists.m[i]<-invlogit(coef(glm))[2]
scientists.doe.m[i]<-invlogit(coef(glm))[3]
scientists.pepco.m[i]<-invlogit(coef(glm))[4]
scientists.pepco.offers.m[i]<-invlogit(coef(glm))[5]
}
##
###TTB Test###
vmoo2.na<-na.omit(vmoo2)
ttb.test<-glm(enroll~ihd.enjoy,data=frame,family=binomial(link="logit"))
a<-ifelse(vmoo2.na$enroll=="Yes",1,0)
b<-ifelse(invlogit(predict(ttb.test,vmoo2.na))>0.5,1,0)
ttbmiss.test<-sum(abs(a-b))/length(vmoo2.na$enroll)
##Recursive Partitioning Test##
vmoo2.d<-na.omit(vmoo2)
rp1<-rpart(enroll~.,data=vmoo[,-c(1,5,7,8,16,20,22,24,30,31,36,38:42,64,66)])
rpmiss.test<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp1,vmoo2)[,2]>0.5,1,0)))/length(vmoo2$enroll)

rp1.noihd<-rpart(enroll~.,data=vmoo[,-c(1,5,7,8,16,20,22,24,30,31,36,38:42,57:60,64,66)])
rpmiss.test.noihd<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp1.noihd,vmoo2)[,2]>0.5,1,0)))/length(vmoo2$enroll)

rp2<-rpart(enroll~.,data=vmoo2[,-1])
rp2miss.test<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp2,vmoo2)[,2]>0.5,1,0)))/length(vmoo2$enroll)
rp2.noihd<-rpart(enroll~.,data=vmoo2[,-c(1,62:65,93)])
rp2.noihd.miss<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp2.noihd,vmoo2)[,2]>0.5,1,0)))/length(vmoo2$enroll)

fold.cart2.miss<-c()
q1<-na.omit(vmoo2)
size<-length(q1[,1])
fold.d<-c()
for(i in 1:size){
fold.d[i]<-sample(c(1:10),1,replace=FALSE)
}
q1<-cbind(q1,fold.d)
for(j in 1:max(fold.d)){
rp2<-rpart(enroll~.,data=q1[-c(q1$fold.d==j),-1])
a<-ifelse(q1$enroll[q1$fold.d==j]=="Yes",1,0)
b<-ifelse(predict(rp2,q1[fold.d==j,])[,2]>0.5,1,0)
fold.cart2.miss[j]<-sum(abs(a-b))/length(q1$fold.d[fold.d==j])}
####
###GLM Test###
vmoo2.d<-na.omit(vmoo2.d)
glm1.test<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+ihd.enjoy+ihd.save.money+ihd.save.electricity+use.plants.animals+care.environment+delicate.nature+growth.limits+spaceship.earth,data=vmoo,family=binomial(link="logit"))
c<-ifelse(vmoo2.d$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm1.test,vmoo2.d))>0.5,1,0) 
glmiss.test<-sum(abs(c-d))/length(vmoo2.d$enroll)
####
##New GLM##
glm2<-glm(enroll~ihd.enjoy+ihd.save.money+ihd.save.electricity+ihd.learn+local.trust+scientists.trust+trust.utility+active.community+control.consume+use.plants.animals+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=vmoo2,family=binomial(link="logit"))
#glm2cv<-cv.glm(vmoo2,glm2,K=10)

qr<-frame.fac
glmiss.fac<-c()
for(i in 1:N){
q1<-qr[sample(nrow(qr),replace=T),]
glm1.fac<-glm(enroll~more.reduce+avoid.waste+buy.new.tech+eco.purchase+hours.home+care.environment+IHD1.fac+NEP1.fac+NEP2.fac,data=q1,family=binomial(link="logit"))
c<-ifelse(frame.fac$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm1.fac,frame.fac))>0.5,1,0) 
glmiss.fac[i]<-sum(abs(c-d))}

#glm2.fac<-glm(enroll~active.community+control.consume+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration+IHD.Fac+NEP.Fac+Trust.Fac+Buy.Fac+Eff.Fac+Cur.Fac,data=vmoo2.fac,family=binomial(link="logit"))                 
###
@ 


<<exp2factors,fig=false,echo=FALSE,results=hide>>=
#Classification Tree (no IHD) & $\mathbf{\Sexpr{ztrunc(rpmiss1.test)}}$ & \Sexpr{ztrunc(22/142)} & \Sexpr{ztrunc(mean(fold.cart.miss1))} & $\mathbf{\Sexpr{ztrunc(mean(rpmiss1)/142)}}$ \\
##IHD Psychometrics##
IHD2<-cbind(vmoo2$ihd.enjoy,vmoo2$ihd.save.money)
IHD2<-cbind(IHD2,vmoo2$ihd.save.electricity)
IHD2<-cbind(IHD2,vmoo2$ihd.learn)
r12<-cor(IHD2[,1],IHD2[,2],use="complete")
r13<-cor(IHD2[,1],IHD2[,3],use="complete")
r14<-cor(IHD2[,1],IHD2[,4],use="complete")
r23<-cor(IHD2[,2],IHD2[,3],use="complete")
r24<-cor(IHD2[,2],IHD2[,4],use="complete")
r34<-cor(IHD2[,3],IHD2[,4],use="complete")
corr<-c(r12,r13,r14,r23,r24,r34)
alpha.ov.IHD2<-alpha(corr)
alpha.1.IHD2<-alpha(corr[4:6])
alpha.2.IHD2<-alpha(corr[c(2,3,6)])
alpha.3.IHD2<-alpha(corr[c(1,3,5)])
alpha.4.IHD2<-alpha(corr[c(1,2,4)])
total.IHD2<-IHD2
t.ov.IHD2<-c(IHD2[,1]+IHD2[,2]+IHD2[,3]+IHD2[,4])
total.IHD2<-cbind(total.IHD2,t.ov.IHD2)
it1.IHD2<-cor(total.IHD2[,1],c(IHD2[,2]+IHD2[,3]+IHD2[,4]),use="complete")
it2.IHD2<-cor(total.IHD2[,2],c(IHD2[,1]+IHD2[,3]+IHD2[,4]),use="complete")
it3.IHD2<-cor(total.IHD2[,3],c(IHD2[,1]+IHD2[,2]+IHD2[,4]),use="complete")
it4.IHD2<-cor(total.IHD2[,4],c(IHD2[,1]+IHD2[,2]+IHD2[,3]),use="complete")
fit.IHD2<-princomp(IHD2,cor=TRUE)
SF.IHD2<-summary(fit.IHD2)
L.IHD2<-loadings(fit.IHD2)
vars.IHD2<-SF.IHD2$sdev^2
vars.IHD2<-vars.IHD2/sum(vars.IHD2)
cumu.IHD2<-cumsum(vars.IHD2)
##NEP Psychometrics##
NEP2<-cbind(reverse.code(-1,vmoo2$use.plants.animals),vmoo2$delicate.nature)
NEP2<-cbind(NEP2,vmoo2$growth.limits)
NEP2<-cbind(NEP2,vmoo2$spaceship.earth)
r12<-cor(NEP2[,1],NEP2[,2],use="complete")
r13<-cor(NEP2[,1],NEP2[,3],use="complete")
r14<-cor(NEP2[,1],NEP2[,4],use="complete")
r23<-cor(NEP2[,2],NEP2[,3],use="complete")
r24<-cor(NEP2[,2],NEP2[,4],use="complete")
r34<-cor(NEP2[,3],NEP2[,4],use="complete")
corr<-c(r12,r13,r14,r23,r24,r34)
alpha.ov.NEP2<-alpha(corr)
alpha.1.NEP2<-alpha(corr[c(4:6)])
alpha.2.NEP2<-alpha(corr[c(2,3,6)])
alpha.3.NEP2<-alpha(corr[c(1,3,5)])
alpha.4.NEP2<-alpha(corr[c(1,2,4)])
total.NEP2<-NEP2
t.ov.NEP2<-c(NEP2[,1]+NEP2[,2]+NEP2[,3]+NEP2[,4])
total.NEP2<-cbind(total.NEP2,t.ov.NEP2)
it1.NEP2<-cor(total.NEP2[,1],c(NEP2[,2]+NEP2[,3]+NEP2[,4]),use="complete")
it2.NEP2<-cor(total.NEP2[,2],c(NEP2[,1]+NEP2[,3]+NEP2[,4]),use="complete")
it3.NEP2<-cor(total.NEP2[,3],c(NEP2[,1]+NEP2[,2]+NEP2[,4]),use="complete")
it4.NEP2<-cor(total.NEP2[,4],c(NEP2[,1]+NEP2[,2]+NEP2[,3]),use="complete")
fit.NEP2<-princomp(NEP2,cor=TRUE)
SF.NEP2<-summary(fit.NEP2)
L.NEP2<-loadings(fit.NEP2)
vars.NEP2<-SF.NEP2$sdev^2
vars.NEP2<-vars.NEP2/sum(vars.NEP2)
cumu.NEP2<-cumsum(vars.NEP2)
#plot(fit,type="lines",main="Scree Plot")
###Eco Purchases factors###
SC.buy<-cbind(vmoo2$buy.new.tech,vmoo2$eco.purchase)
SC.buy<-cbind(SC.buy,vmoo2$switch.products)
SC.buy<-cbind(SC.buy,vmoo2$eco.appliance.buy)
SC.buy<-cbind(SC.buy,vmoo2$eco.equal.buy)
SC.buy<-cbind(SC.buy,reverse.code(-1,vmoo2$obsolete.buy))
r12<-cor(SC.buy[,1],SC.buy[,2],use="complete")
r13<-cor(SC.buy[,1],SC.buy[,3],use="complete")
r14<-cor(SC.buy[,1],SC.buy[,4],use="complete")
r15<-cor(SC.buy[,1],SC.buy[,5],use="complete")
r16<-cor(SC.buy[,1],SC.buy[,6],use="complete")
r23<-cor(SC.buy[,2],SC.buy[,3],use="complete")
r24<-cor(SC.buy[,2],SC.buy[,4],use="complete")
r25<-cor(SC.buy[,2],SC.buy[,5],use="complete")
r26<-cor(SC.buy[,2],SC.buy[,6],use="complete")
r34<-cor(SC.buy[,3],SC.buy[,4],use="complete")
r35<-cor(SC.buy[,3],SC.buy[,5],use="complete")
r36<-cor(SC.buy[,3],SC.buy[,6],use="complete")
r45<-cor(SC.buy[,4],SC.buy[,5],use="complete")
r45<-cor(SC.buy[,4],SC.buy[,6],use="complete")
r56<-cor(SC.buy[,5],SC.buy[,6],use="complete")
corr<-c(r12,r13,r14,r15,r16,r23,r24,r25,r26,r34,r35,r36,r45,r46,r56)
corr2<-c(r23,r24,r25,r34,r35,r45)
alpha.ov.buy<-alpha(corr)
alpha.ov.buy.red<-alpha(corr2)
alpha.1.buy<-alpha(corr[6:15])
alpha.2.buy<-alpha(corr[c(2,3,4,5,10:15)])
alpha.3.buy<-alpha(corr[c(1,3,4,5,7,8,9,13:15)])
alpha.4.buy<-alpha(corr[c(1,2,4,5,6,8,9,11,12,15)])
alpha.5.buy<-alpha(corr[c(1,2,3,5,6,7,9,10,12,14)])
alpha.6.buy<-alpha(corr[c(1,2,3,4,6,7,8,10,11,13)])
total.buy<-SC.buy
t.ov.buy<-c(SC.buy[,1]+SC.buy[,2]+SC.buy[,3]+SC.buy[,4]+SC.buy[,5]+SC.buy[,6])
total.buy<-cbind(total.buy,t.ov.buy)
it1.buy<-cor(total.buy[,1],c(SC.buy[,2]+SC.buy[,3]+SC.buy[,4]+SC.buy[,5]+SC.buy[,6]),use="complete")
it2.buy<-cor(total.buy[,2],c(SC.buy[,1]+SC.buy[,3]+SC.buy[,4]+SC.buy[,5]+SC.buy[,6]),use="complete")
it3.buy<-cor(total.buy[,3],c(SC.buy[,1]+SC.buy[,2]+SC.buy[,4]+SC.buy[,5]+SC.buy[,6]),use="complete")
it4.buy<-cor(total.buy[,4],c(SC.buy[,1]+SC.buy[,2]+SC.buy[,3]+SC.buy[,5]+SC.buy[,6]),use="complete")
it5.buy<-cor(total.buy[,5],c(SC.buy[,1]+SC.buy[,2]+SC.buy[,3]+SC.buy[,4]+SC.buy[,6]),use="complete")
it6.buy<-cor(total.buy[,6],c(SC.buy[,1]+SC.buy[,2]+SC.buy[,3]+SC.buy[,4]+SC.buy[,5]),use="complete")
SC.buy<-na.omit(SC.buy[,-c(1,6)])
fit.buy<-princomp(SC.buy,cor=TRUE)
SF.buy<-summary(fit.buy)
L.buy<-loadings(fit.buy)
vars.buy<-SF.buy$sdev^2
vars.buy<-vars.buy/sum(vars.buy)
cumu.buy<-cumsum(vars.buy)
##Trust Psychometrics##
trust2<-cbind(vmoo2$federal.trust,vmoo2$local.trust)
trust2<-cbind(trust2,vmoo2$scientists.trust)
trust2<-cbind(trust2,vmoo2$trust.utility)
trust2<-cbind(trust2,vmoo2$community.trust)
trust2<-cbind(trust2,vmoo2$family.trust)
trust2<-cbind(trust2,vmoo2$friends.trust)
trust2<-cbind(trust2,vmoo2$co.workers.trust)
r12.trust2<-cor(trust2[,1],trust2[,2],use="complete")
r13.trust2<-cor(trust2[,1],trust2[,3],use="complete")
r14.trust2<-cor(trust2[,1],trust2[,4],use="complete")
r15.trust2<-cor(trust2[,1],trust2[,5],use="complete")
r16.trust2<-cor(trust2[,1],trust2[,6],use="complete")
r17.trust2<-cor(trust2[,1],trust2[,7],use="complete")
r18.trust2<-cor(trust2[,1],trust2[,8],use="complete")
r23.trust2<-cor(trust2[,2],trust2[,3],use="complete")
r24.trust2<-cor(trust2[,2],trust2[,4],use="complete")
r25.trust2<-cor(trust2[,2],trust2[,5],use="complete")
r26.trust2<-cor(trust2[,2],trust2[,6],use="complete")
r27.trust2<-cor(trust2[,2],trust2[,7],use="complete")
r28.trust2<-cor(trust2[,2],trust2[,8],use="complete")
r34.trust2<-cor(trust2[,3],trust2[,4],use="complete")
r35.trust2<-cor(trust2[,3],trust2[,5],use="complete")
r36.trust2<-cor(trust2[,3],trust2[,6],use="complete")
r37.trust2<-cor(trust2[,3],trust2[,7],use="complete")
r38.trust2<-cor(trust2[,3],trust2[,8],use="complete")
r45.trust2<-cor(trust2[,4],trust2[,5],use="complete")
r46.trust2<-cor(trust2[,4],trust2[,6],use="complete")
r47.trust2<-cor(trust2[,4],trust2[,7],use="complete")
r48.trust2<-cor(trust2[,4],trust2[,8],use="complete")
r56.trust2<-cor(trust2[,5],trust2[,6],use="complete")
r57.trust2<-cor(trust2[,5],trust2[,7],use="complete")
r58.trust2<-cor(trust2[,5],trust2[,8],use="complete")
r67.trust2<-cor(trust2[,6],trust2[,7],use="complete")
r68.trust2<-cor(trust2[,6],trust2[,8],use="complete")
r78.trust2<-cor(trust2[,7],trust2[,8],use="complete")
corr<-c(r12.trust2,r13.trust2,r14.trust2,r15.trust2,r16.trust2,r17.trust2,r18.trust2,r23.trust2,r24.trust2,r25.trust2,r26.trust2,r27.trust2,r28.trust2,r34.trust2,r35.trust2,r36.trust2,r37.trust2,r38.trust2,r45.trust2,r46.trust2,r47.trust2,r48.trust2,r56.trust2,r57.trust2,r58.trust2,r67.trust2,r68.trust2,r78.trust2)
corr2<-c(r23.trust2,r24.trust2,r28.trust2,r34.trust2,r38.trust2,r48.trust2)
alpha.ov.trust2<-alpha(corr)
alpha.ov.trust2.red<-alpha(corr2)
alpha.1.trust2<-alpha(corr[8:28])
alpha.2.trust2<-alpha(corr[c(2,3,4,5,6,7,14:28)])
alpha.3.trust2<-alpha(corr[c(1,3,4,5,6,7,9,10,11,12,13,19:28)])
alpha.4.trust2<-alpha(corr[c(1,2,4,5,6,7,8,10,11,12,13,15,16,17,18,23:28)])
alpha.5.trust2<-alpha(corr[c(1,2,3,5,6,7,8,9,11,12,13,14,16,17,18,20,21,22,26,27,28)])
alpha.6.trust2<-alpha(corr[c(1,2,3,4,6,7,8,9,10,12,13,14,15,17,18,19,21,22,24,25,28)])
alpha.7.trust2<-alpha(corr[c(1:5,7:11,13:16,18:20,22,23,25,27)])
alpha.8.trust2<-alpha(corr[c(1:6,8:12,14:17,19:21,23,24,26)])
total.trust2<-trust2
t.ov.trust2<-c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,7]+trust2[,8])
total.trust2<-cbind(total.trust2,t.ov.trust2)
it1.trust2<-cor(total.trust2[,1],c(trust2[,2]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,7]+trust2[,8]),use="complete")
it2.trust2<-cor(total.trust2[,2],c(trust2[,1]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,7]+trust2[,8]),use="complete")
it3.trust2<-cor(total.trust2[,3],c(trust2[,1]+trust2[,2]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,7]+trust2[,8]),use="complete")
it4.trust2<-cor(total.trust2[,4],c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,5]+trust2[,6]+trust2[,7]+trust2[,8]),use="complete")
it5.trust2<-cor(total.trust2[,5],c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,4]+trust2[,6]+trust2[,7]+trust2[,8]),use="complete")
it6.trust2<-cor(total.trust2[,6],c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,7]+trust2[,8]),use="complete")
it7.trust2<-cor(total.trust2[,7],c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,8]),use="complete")
it8.trust2<-cor(total.trust2[,8],c(trust2[,1]+trust2[,2]+trust2[,3]+trust2[,4]+trust2[,5]+trust2[,6]+trust2[,7]),use="complete")
fit.trust2<-princomp(trust2[,-c(1,5,6,7)],cor=TRUE)
SF.trust2<-summary(fit.trust2)
L.trust2<-loadings(fit.trust2)
vars.trust2<-SF.trust2$sdev^2
vars.trust2<-vars.trust2/sum(vars.trust2)
cumu.trust2<-cumsum(vars.trust2)
##
##Volunteer Behaviors##
CS.1<-chisq.test(vmoo2$protest.demonstration,vmoo2$enroll)
CS.2<-chisq.test(vmoo2$cfl,vmoo2$enroll)
##Self-Efficacy##
SC.eff<-cbind(reverse.code(-1,vmoo2$too.complicated),vmoo2$right.to.work)
SC.eff<-cbind(SC.eff,reverse.code(-1,vmoo2$not.capable))
SC.eff<-cbind(SC.eff,reverse.code(-1,vmoo2$unexpected.problems))
SC.eff<-cbind(SC.eff,vmoo2$make.plans.work)
r12<-cor(SC.eff[,1],SC.eff[,2],use="complete")
r13<-cor(SC.eff[,1],SC.eff[,3],use="complete")
r14<-cor(SC.eff[,1],SC.eff[,4],use="complete")
r15<-cor(SC.eff[,1],SC.eff[,5],use="complete")
r23<-cor(SC.eff[,2],SC.eff[,3],use="complete")
r24<-cor(SC.eff[,2],SC.eff[,4],use="complete")
r25<-cor(SC.eff[,2],SC.eff[,5],use="complete")
r34<-cor(SC.eff[,3],SC.eff[,4],use="complete")
r35<-cor(SC.eff[,3],SC.eff[,5],use="complete")
r45<-cor(SC.eff[,4],SC.eff[,5],use="complete")
corr<-c(r12,r13,r14,r15,r23,r24,r25,r34,r35,r45)
corr2<-c(r13,r14,r15,r34,r35,r45)
alpha.ov.eff<-alpha(corr)
alpha.ov.eff.red<-alpha(corr2)
alpha.1.eff<-alpha(corr[5:10])
alpha.2.eff<-alpha(corr[c(2,3,4,8,9,10)])
alpha.3.eff<-alpha(corr[c(1,3,4,6,7,10)])
alpha.4.eff<-alpha(corr[c(1,2,4,5,7,9)])
alpha.5.eff<-alpha(corr[c(1,2,3,5,6,8)])
total.eff<-SC.eff
t.ov.eff<-c(SC.eff[,1]+SC.eff[,2]+SC.eff[,3]+SC.eff[,4]+SC.eff[,5])
total.eff<-cbind(total.eff,t.ov.eff)
it1.eff<-cor(total.eff[,1],c(SC.eff[,2]+SC.eff[,3]+SC.eff[,4]+SC.eff[,5]),use="complete")
it2.eff<-cor(total.eff[,2],c(SC.eff[,1]+SC.eff[,3]+SC.eff[,4]+SC.eff[,5]),use="complete")
it3.eff<-cor(total.eff[,3],c(SC.eff[,1]+SC.eff[,2]+SC.eff[,4]+SC.eff[,5]),use="complete")
it4.eff<-cor(total.eff[,4],c(SC.eff[,1]+SC.eff[,2]+SC.eff[,3]+SC.eff[,5]),use="complete")
it5.eff<-cor(total.eff[,5],c(SC.eff[,1]+SC.eff[,2]+SC.eff[,3]+SC.eff[,4]),use="complete")
SC.eff<-na.omit(SC.eff)
fit.eff<-princomp(SC.eff[,-2],cor=TRUE)
SF.eff<-summary(fit.eff)
L.eff<-loadings(fit.eff)
vars.eff<-SF.eff$sdev^2
vars.eff<-vars.eff/sum(vars.eff)
cumu.eff<-cumsum(vars.eff)
##Exploration##
SC.ex<-cbind(vmoo2$new.writers,vmoo2$enjoy.mental.challenge)
SC.ex<-cbind(SC.ex,vmoo2$like.work.problem)
SC.ex<-cbind(SC.ex,vmoo2$new.words)
SC.ex<-cbind(SC.ex,vmoo2$eager.to.know)
SC.ex<-cbind(SC.ex,vmoo2$new.subject)
r12<-cor(SC.ex[,1],SC.ex[,2],use="complete")
r13<-cor(SC.ex[,1],SC.ex[,3],use="complete")
r14<-cor(SC.ex[,1],SC.ex[,4],use="complete")
r15<-cor(SC.ex[,1],SC.ex[,5],use="complete")
r16<-cor(SC.ex[,1],SC.ex[,6],use="complete")
r23<-cor(SC.ex[,2],SC.ex[,3],use="complete")
r24<-cor(SC.ex[,2],SC.ex[,4],use="complete")
r25<-cor(SC.ex[,2],SC.ex[,5],use="complete")
r26<-cor(SC.ex[,2],SC.ex[,6],use="complete")
r34<-cor(SC.ex[,3],SC.ex[,4],use="complete")
r35<-cor(SC.ex[,3],SC.ex[,5],use="complete")
r36<-cor(SC.ex[,3],SC.ex[,6],use="complete")
r45<-cor(SC.ex[,4],SC.ex[,5],use="complete")
r45<-cor(SC.ex[,4],SC.ex[,6],use="complete")
r56<-cor(SC.ex[,5],SC.ex[,6],use="complete")
corr<-c(r12,r13,r14,r15,r16,r23,r24,r25,r26,r34,r35,r36,r45,r46,r56)
corr2<-c(r23,r25,r26,r35,r36,r56)
alpha.ov.ex<-alpha(corr)
alpha.ov.ex.red<-alpha(corr)
alpha.1.ex<-alpha(corr[6:15])
alpha.2.ex<-alpha(corr[c(2,3,4,5,10:15)])
alpha.3.ex<-alpha(corr[c(1,3,4,5,7,8,9,13:15)])
alpha.4.ex<-alpha(corr[c(1,2,4,5,6,8,9,11,12,15)])
alpha.5.ex<-alpha(corr[c(1,2,3,5,6,7,9,10,12,14)])
alpha.6.ex<-alpha(corr[c(1,2,3,4,6,7,8,10,11,13)])
total.ex<-SC.ex
t.ov.ex<-c(SC.ex[,1]+SC.ex[,2]+SC.ex[,3]+SC.ex[,4]+SC.ex[,5]+SC.ex[,6])
total.ex<-cbind(total.ex,t.ov.ex)
it1.ex<-cor(total.ex[,1],c(SC.ex[,2]+SC.ex[,3]+SC.ex[,4]+SC.ex[,5]+SC.ex[,6]),use="complete")
it2.ex<-cor(total.ex[,2],c(SC.ex[,1]+SC.ex[,3]+SC.ex[,4]+SC.ex[,5]+SC.ex[,6]),use="complete")
it3.ex<-cor(total.ex[,3],c(SC.ex[,1]+SC.ex[,2]+SC.ex[,4]+SC.ex[,5]+SC.ex[,6]),use="complete")
it4.ex<-cor(total.ex[,4],c(SC.ex[,1]+SC.ex[,2]+SC.ex[,3]+SC.ex[,5]+SC.ex[,6]),use="complete")
it5.ex<-cor(total.ex[,5],c(SC.ex[,1]+SC.ex[,2]+SC.ex[,3]+SC.ex[,4]+SC.ex[,6]),use="complete")
it6.ex<-cor(total.ex[,6],c(SC.ex[,1]+SC.ex[,2]+SC.ex[,3]+SC.ex[,4]+SC.ex[,5]),use="complete")
SC.ex<-na.omit(SC.ex)
fit.ex<-princomp(SC.ex[,-c(1,4)],cor=TRUE)
SF.ex<-summary(fit.ex)
L.ex<-loadings(fit.ex)
vars.ex<-SF.ex$sdev^2
vars.ex<-vars.ex/sum(vars.ex)
cumu.ex<-cumsum(vars.ex)
##Frugality##
SC.fr<-cbind(vmoo2$care.possessions,vmoo2$no.throw.away)
SC.fr<-cbind(SC.fr,vmoo2$better.use.resources)
SC.fr<-cbind(SC.fr,vmoo2$reuse.items)
SC.fr<-cbind(SC.fr,vmoo2$resist.buying)
SC.fr<-cbind(SC.fr,vmoo2$wait.purchase)
r12<-cor(SC.fr[,1],SC.fr[,2],use="complete")
r13<-cor(SC.fr[,1],SC.fr[,3],use="complete")
r14<-cor(SC.fr[,1],SC.fr[,4],use="complete")
r15<-cor(SC.fr[,1],SC.fr[,5],use="complete")
r16<-cor(SC.fr[,1],SC.fr[,6],use="complete")
r23<-cor(SC.fr[,2],SC.fr[,3],use="complete")
r24<-cor(SC.fr[,2],SC.fr[,4],use="complete")
r25<-cor(SC.fr[,2],SC.fr[,5],use="complete")
r26<-cor(SC.fr[,2],SC.fr[,6],use="complete")
r34<-cor(SC.fr[,3],SC.fr[,4],use="complete")
r35<-cor(SC.fr[,3],SC.fr[,5],use="complete")
r36<-cor(SC.fr[,3],SC.fr[,6],use="complete")
r45<-cor(SC.fr[,4],SC.fr[,5],use="complete")
r45<-cor(SC.fr[,4],SC.fr[,6],use="complete")
r56<-cor(SC.fr[,5],SC.fr[,6],use="complete")
corr<-c(r12,r13,r14,r15,r16,r23,r24,r25,r26,r34,r35,r36,r45,r46,r56)
corr<-c(r14,r15,r16,r45,r46,r56)
alpha.ov.fr<-alpha(corr)
alpha.ov.fr.red<-alpha(corr)
alpha.1.fr<-alpha(corr[6:15])
alpha.2.fr<-alpha(corr[c(2,3,4,5,10:15)])
alpha.3.fr<-alpha(corr[c(1,3,4,5,7,8,9,13:15)])
alpha.4.fr<-alpha(corr[c(1,2,4,5,6,8,9,11,12,15)])
alpha.5.fr<-alpha(corr[c(1,2,3,5,6,7,9,10,12,14)])
alpha.6.fr<-alpha(corr[c(1,2,3,4,6,7,8,10,11,13)])
total.fr<-SC.fr
t.ov.fr<-c(SC.fr[,1]+SC.fr[,2]+SC.fr[,3]+SC.fr[,4]+SC.fr[,5]+SC.fr[,6])
total.fr<-cbind(total.fr,t.ov.fr)
it1.fr<-cor(total.fr[,1],c(SC.fr[,2]+SC.fr[,3]+SC.fr[,4]+SC.fr[,5]+SC.fr[,6]),use="complete")
it2.fr<-cor(total.fr[,2],c(SC.fr[,1]+SC.fr[,3]+SC.fr[,4]+SC.fr[,5]+SC.fr[,6]),use="complete")
it3.fr<-cor(total.fr[,3],c(SC.fr[,1]+SC.fr[,2]+SC.fr[,4]+SC.fr[,5]+SC.fr[,6]),use="complete")
it4.fr<-cor(total.fr[,4],c(SC.fr[,1]+SC.fr[,2]+SC.fr[,3]+SC.fr[,5]+SC.fr[,6]),use="complete")
it5.fr<-cor(total.fr[,5],c(SC.fr[,1]+SC.fr[,2]+SC.fr[,3]+SC.fr[,4]+SC.fr[,6]),use="complete")
it6.fr<-cor(total.fr[,6],c(SC.fr[,1]+SC.fr[,2]+SC.fr[,3]+SC.fr[,4]+SC.fr[,5]),use="complete")
SC.fr<-na.omit(SC.fr)
fit.fr<-princomp(SC.fr[,-c(2,3)],cor=TRUE)
SF.fr<-summary(fit.fr)
L.fr<-loadings(fit.fr)
vars.fr<-SF.fr$sdev^2
vars.fr<-vars.fr/sum(vars.fr)
cumu.fr<-cumsum(vars.fr)
##Socializing##
soc<-cbind(vmoo2$close.friends,vmoo2$talk.two.weeks)
soc<-cbind(soc,vmoo2$party.gathering)
r12<-cor(soc[,1],soc[,2],use="complete")
r13<-cor(soc[,1],soc[,3],use="complete")
r23<-cor(soc[,2],soc[,3],use="complete")
corr<-c(r12,r13,r23)
alpha.ov.soc<-alpha(corr)
total.soc<-soc
t.ov.soc<-c(soc[,1]+soc[,2]+soc[,3])
total.soc<-cbind(total.soc,t.ov.soc)
it1.soc<-cor(total.soc[,1],c(soc[,2]+soc[,3]),use="complete")
it2.soc<-cor(total.soc[,2],c(soc[,1]+soc[,3]),use="complete")
it3.soc<-cor(total.soc[,3],c(soc[,1]+soc[,2]),use="complete")
fit.soc<-princomp(soc,cor=TRUE)
SF.soc<-summary(fit.soc)
L.soc<-loadings(fit.soc)
vars.soc<-SF.soc$sdev^2
vars.soc<-vars.soc/sum(vars.soc)
cumu.soc<-cumsum(vars.soc)
####
##Social Comparison##
socom<-cbind(vmoo2$more.reduce,vmoo2$care.environment)
socom<-cbind(socom,vmoo2$consistent.recycle)
socom<-cbind(socom,vmoo2$active.community)
r12<-cor(socom[,1],socom[,2],use="complete")
r13<-cor(socom[,1],socom[,3],use="complete")
r14<-cor(socom[,1],socom[,4],use="complete")
r23<-cor(socom[,2],socom[,3],use="complete")
r24<-cor(socom[,2],socom[,4],use="complete")
r34<-cor(socom[,3],socom[,4],use="complete")
corr<-c(r12,r13,r14,r23,r24,r34)
alpha.ov.socom<-alpha(corr)
alpha.1.socom<-alpha(corr[c(4:6)])
alpha.2.socom<-alpha(corr[c(2,3,6)])
alpha.3.socom<-alpha(corr[c(1,3,5)])
alpha.4.socom<-alpha(corr[c(1,2,4)])
total.socom<-socom
t.ov.socom<-c(socom[,1]+socom[,2]+socom[,3]+socom[,4])
total.socom<-cbind(total.socom,t.ov.socom)
it1.socom<-cor(total.socom[,1],c(socom[,2]+socom[,3]+socom[,4]),use="complete")
it2.socom<-cor(total.socom[,2],c(socom[,1]+socom[,3]+socom[,4]),use="complete")
it3.socom<-cor(total.socom[,3],c(socom[,1]+socom[,2]+socom[,4]),use="complete")
it4.socom<-cor(total.socom[,4],c(socom[,1]+socom[,2]+socom[,3]),use="complete")
fit.socom<-princomp(socom,cor=TRUE)
SF.socom<-summary(fit.socom)
L.socom<-loadings(fit.socom)
vars.socom<-SF.socom$sdev^2
vars.socom<-vars.socom/sum(vars.socom)
cumu.socom<-cumsum(vars.socom)
####
##Equal Weights Study 2###
glmeq.3<-glm(enroll~eqsum2,data=frame2,family=binomial(link="logit"))
glmcveq.3<-cv.glm(frame2,glmeq.3,K=10)
eqm.3<-table(ifelse(frame2$enroll=="Yes",1,0),ifelse(invlogit(predict(glmeq.3,frame2))>0.5,1,0))
miss.eq3<-eqm.3[2]+eqm.3[3]
q<-frame2
miss.eqq3<-c()
for(i in 1:N){
q1<-q[sample(nrow(q),replace=T),]
glmeq<-glm(enroll~eqsum2,data=q1,family=binomial(link="logit"))
miss.eqq3[i]<-sum(abs(ifelse(frame2$enroll=="Yes",1,0)-ifelse(invlogit(predict(glmeq,frame2))>0.5,1,0)))
}
eqm1<-eqm.3[1]+eqm.3[2]
eqm2<-eqm.3[3]+eqm.3[4]
eqm3<-eqm.3[1]+eqm.3[3]
eqm4<-eqm.3[2]+eqm.3[4]
eqq.3<-sqrt(eqm1*eqm2*eqm3*eqm4)
##
##TTB study 2##
ttb.2<-glm(enroll~ihd.enjoy,data=vmoo2,family=binomial(link="logit"))
ttb.2.cv<-cv.glm(vmoo2,ttb.2,K=10)
ttb.2t<-table(ifelse(vmoo2$enroll=="Yes",1,0),ifelse(invlogit(predict(ttb.2))>0.5,1,0))      
gmm21<-ttb.2t[1]+ttb.2t[2]
gmm22<-ttb.2t[3]+ttb.2t[4]
gmm23<-ttb.2t[1]+ttb.2t[3]
gmm24<-ttb.2t[2]+ttb.2t[4]
ttb.22<-sqrt(gmm21*gmm22*gmm23*gmm24)
q<-vmoo2
ttbmiss.2<-c()
for(i in 1:N){
q1<-q[sample(nrow(q),replace=T),]
ttb.2<-glm(enroll~ihd.enjoy,data=q1,family=binomial(link="logit"))
ttbmiss.2[i]<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(invlogit(predict(ttb.2,vmoo2))>0.5,1,0)))
}
####
###MLE Logistic Study 2###
glmiss.2<-c()
for(i in 1:N){
qr<-vmoo2
q1<-qr[sample(nrow(qr),replace=T),]
q1$prize.drawing<-drop.levels(q1$prize.drawing)
id <- which(!(qr$prize.drawing %in% levels(q1$prize.drawing)))
q1$prize.drawing[id]<-NA
qr$prize.drawing[id]<-NA
qr$prize.drawing<-drop.levels(qr$prize.drawing)
q1$prize.drawing<-drop.levels(q1$prize.drawing)

q1$protest.demonstration<-drop.levels(q1$protest.demonstration)
id <- which(!(qr$protest.demonstration %in% levels(q1$protest.demonstration)))
q1$protest.demonstration[id]<-NA
qr$protest.demonstration[id]<-NA
qr$protest.demonstration<-drop.levels(qr$protest.demonstration)
q1$protest.demonstration<-drop.levels(q1$protest.demonstration)

qr<-na.omit(qr)
glm.2<-glm(enroll~ihd.enjoy+ihd.save.money+ihd.save.electricity+ihd.learn+local.trust+scientists.trust+trust.utility+active.community+control.consume+use.plants.animals+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=q1,family=binomial(link="logit"))
c<-ifelse(qr$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm.2,qr))>0.5,1,0) 
glmiss.2[i]<-sum(abs(c-d))}
glm.2<-glm(enroll~ihd.enjoy+ihd.save.money+ihd.save.electricity+ihd.learn+local.trust+scientists.trust+trust.utility+active.community+control.consume+use.plants.animals+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=qr,family=binomial(link="logit"))

gm.2<-table(ifelse(qr$enroll=="Yes",1,0),ifelse(invlogit(predict(glm.2,qr))>0.5,1,0))
gmm1<-gm.2[1]+gm.2[2]
gmm2<-gm.2[3]+gm.2[4]
gmm3<-gm.2[1]+gm.2[3]
gmm4<-gm.2[2]+gm.2[4]
gmm.2<-sqrt(gmm1*gmm2*gmm3*gmm4)

glm.2.ov<-glm(enroll~ihd.enjoy+ihd.save.money+ihd.save.electricity+ihd.learn+local.trust+scientists.trust+trust.utility+active.community+control.consume+use.plants.animals+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=vmoo2,family=binomial(link="logit"))

##MLE Logistic with Factors Study 2##
frame2.fac<-cbind(vmoo2,SF.IHD2$scores[,1])
frame2.fac<-cbind(frame2.fac,SF.NEP2$scores[,1])
frame2.fac<-cbind(frame2.fac,SF.trust2$scores[,1])
colnames(frame2.fac)[94:96]<-c("IHD.fac","NEP.fac","Trust.fac")
glm2.fac<-glm(enroll~IHD.fac+NEP.fac+Trust.fac+active.community+control.consume+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=frame2.fac,family=binomial(link="logit"))

qr<-frame2.fac
qr$prize.drawing[197]<-NA
qr$protest.demonstration<-drop.levels(qr$protest.demonstration)
qr$prize.drawing<-drop.levels(qr$prize.drawing)
qr<-na.omit(qr)
glmcv.2<-cv.glm(qr,glm.2,K=10)

glmiss2.fac<-c()
for(i in 1:N){
qr<-frame2.fac
q1<-qr[sample(nrow(qr),replace=T),]
q1$prize.drawing<-drop.levels(q1$prize.drawing)
id <- which(!(qr$prize.drawing %in% levels(q1$prize.drawing)))
q1$prize.drawing[id]<-NA
qr$prize.drawing[id]<-NA
qr$prize.drawing<-drop.levels(qr$prize.drawing)
q1$prize.drawing<-drop.levels(q1$prize.drawing)

q1$protest.demonstration<-drop.levels(q1$protest.demonstration)
id <- which(!(qr$protest.demonstration %in% levels(q1$protest.demonstration)))
q1$protest.demonstration[id]<-NA
qr$protest.demonstration[id]<-NA
qr$protest.demonstration<-drop.levels(qr$protest.demonstration)
q1$protest.demonstration<-drop.levels(q1$protest.demonstration)
qr<-na.omit(qr)

glm2.fac<-glm(enroll~IHD.fac+NEP.fac+Trust.fac+active.community+control.consume+new.words+income+ten.am.two.pm+two.pm.six.pm+ten.pm.two.am+two.am.six.am+tracking.device+prize.drawing+protest.demonstration,data=q1,family=binomial(link="logit"))
c<-ifelse(qr$enroll=="Yes",1,0)
d<-ifelse(invlogit(predict(glm2.fac,qr))>0.5,1,0) 
glmiss2.fac[i]<-sum(abs(c-d))}

qr<-frame2.fac
qr$prize.drawing[197]<-NA
qr$protest.demonstration<-drop.levels(qr$protest.demonstration)
qr$prize.drawing<-drop.levels(qr$prize.drawing)
qr<-na.omit(qr)
glmcv2.fac<-cv.glm(qr,glm2.fac,K=10)

gm2.fac<-table(ifelse(qr$enroll=="Yes",1,0),ifelse(invlogit(predict(glm2.fac,qr))>0.5,1,0))
gmm1<-gm2.fac[1]+gm2.fac[2]
gmm2<-gm2.fac[3]+gm2.fac[4]
gmm3<-gm2.fac[1]+gm2.fac[3]
gmm4<-gm2.fac[2]+gm2.fac[4]
gmm2.fac<-sqrt(gmm1*gmm2*gmm3*gmm4)

####
##Total Hours##
vmoo2$ten.am.two.pm.bin<-ifelse(vmoo2$ten.am.two.pm=="yes",1,0)
vmoo2$two.pm.six.pm.bin<-ifelse(vmoo2$two.pm.six.pm=="yes",1,0)
vmoo2$six.pm.ten.pm.bin<-ifelse(vmoo2$six.pm.ten.pm=="yes",1,0)
vmoo2$ten.pm.two.am.bin<-ifelse(vmoo2$ten.pm.two.am=="yes",1,0)
vmoo2$two.am.six.am.bin<-ifelse(vmoo2$two.am.six.am=="yes",1,0)
vmoo2$six.am.ten.am.bin<-ifelse(vmoo2$six.am.ten.am=="yes",1,0)
vmoo2$total.hours<-vmoo2$ten.am.two.pm.bin+vmoo2$two.pm.six.pm.bin+vmoo2$six.pm.ten.pm.bin+vmoo2$ten.pm.two.am.bin+vmoo2$two.am.six.am.bin+vmoo2$six.am.ten.am.bin
vmoo2$morning.hours<-vmoo2$ten.am.two.pm.bin+vmoo2$six.am.ten.am.bin
vmoo2$evening.hours<-vmoo2$six.pm.ten.pm.bin+vmoo2$ten.pm.two.am.bin

##Recursive Partitioning Study 2##
vmoo2<-na.omit(vmoo2)
rp2<-rpart(enroll~.,data=vmoo2[,-c(1,8,93)])
rp2t<-table(ifelse(vmoo2$enroll=="Yes",1,0),ifelse(predict(rp2,vmoo2)[,2]>0.5,1,0))
rp2t1<-rp2t[1]+rp2t[2]
rp2t2<-rp2t[3]+rp2t[4]
rp2t3<-rp2t[1]+rp2t[3]
rp2t4<-rp2t[2]+rp2t[4]
rp2tt<-sqrt(rp2t1*rp2t2*rp2t3*rp2t4)

qz<-vmoo2
rp2miss<-c()
for(i in 1:N){
q1<-qz[sample(nrow(qz),replace=T),]
rp2<-rpart(enroll~.,data=q1[,-c(1,8,93)])
rp2miss[i]<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp2,vmoo2)[,2]>0.5,1,0)))}

rp2.noihd<-rpart(enroll~.,data=vmoo2[,-c(1,8,62:65,93)])
rp2miss.n<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp2.noihd,vmoo2)[,2]>0.5,1,0)))/length(vmoo2$enroll)
rp2t.n<-table(ifelse(vmoo2$enroll=="Yes",1,0),ifelse(predict(rp2,vmoo2)[,2]>0.5,1,0))
rp2t1.n<-rp2t.n[1]+rp2t.n[2]
rp2t2.n<-rp2t.n[3]+rp2t.n[4]
rp2t3.n<-rp2t.n[1]+rp2t.n[3]
rp2t4.n<-rp2t.n[2]+rp2t.n[4]
rp2tt.n<-sqrt(rp2t1.n*rp2t2.n*rp2t3.n*rp2t4.n)

qz<-vmoo2
rp2miss.n<-c()
for(i in 1:N){
q1<-qz[sample(nrow(qz),replace=T),]
rp2<-rpart(enroll~.,data=q1[,-c(1,8,62:65,93)])
rp2miss.n[i]<-sum(abs(ifelse(vmoo2$enroll=="Yes",1,0)-ifelse(predict(rp2,vmoo2)[,2]>0.5,1,0)))}

fold.cart2.miss<-c()
q1<-na.omit(vmoo2)
size<-length(q1[,1])
fold.d<-c()
for(i in 1:size){
fold.d[i]<-sample(c(1:10),1,replace=FALSE)
}
q1<-cbind(q1,fold.d)
for(j in 1:max(fold.d)){
rp2<-rpart(enroll~.,data=q1[-c(q1$fold.d==j),-c(1,8,93)])
a<-ifelse(q1$enroll[q1$fold.d==j]=="Yes",1,0)
b<-ifelse(predict(rp2,q1[fold.d==j,])[,2]>0.5,1,0)
fold.cart2.miss[j]<-sum(abs(a-b))/length(q1$fold.d[fold.d==j])}

fold.cart2.miss.n<-c()
q1<-na.omit(vmoo2)
size<-length(q1[,1])
fold.d<-c()
for(i in 1:size){
fold.d[i]<-sample(c(1:10),1,replace=FALSE)
}
q1<-cbind(q1,fold.d)
for(j in 1:max(fold.d)){
rp2<-rpart(enroll~.,data=q1[-c(q1$fold.d==j),-c(1,8,62:65,93)])
a<-ifelse(q1$enroll[q1$fold.d==j]=="Yes",1,0)
b<-ifelse(predict(rp2,q1[fold.d==j,])[,2]>0.5,1,0)
fold.cart2.miss.n[j]<-sum(abs(a-b))/length(q1$fold.d[fold.d==j])}
####
@ 

Intentions to enroll in the trial were unassociated with demographics.  Table~\ref{tab:time2} shows the performance of different measures of constraints or barriers to participation.  Participants who reported being home in were more likely to state that they were willing to enroll in the trial than those who were not around.  This was confirmed by a strong correlation between total or morning hours spent in the home and intentions to enroll.  Measures of trust in the utility and scientists were also positively associated with willingness to enroll in the trial.  Scientists were the most trusted of all the `institutions', higher than local government ($t=$ \Sexpr{prettyNum(t.test(vmoo2$scientists.trust,vmoo2$local.trust)$statistic)}, $p=$ \Sexpr{ztrunc(as.numeric(t.test(vmoo2$scientists.trust,vmoo2$local.trust)[3]))}, $d=$ \Sexpr{prettyNum(t.test(vmoo2$scientists.trust,vmoo2$local.trust)$statistic/sqrt(length(vmoo2$scientists.trust)))}), then federal government, then utility last.  They were also trusted more than community and co-workers but less than family and friends.  In terms of self-efficacy, participants who reported that they would not bother trying to figure out complicated tasks stated that they were less willing to enroll in the trial, indicating that feelings of difficulty and inability to use the technology were an important element of the decision to enroll.

\begin{table}[hp]
  \caption{Univariate predictions for items and scales related to constraints on study participation.  $\tau$ is Kendall's measure of rank correlation \cite{kendall1938new}.}
  \label{tab:time2}
  \centering
  \scalebox{0.87}{
    \begin{tabular}{p{10cm} c c c}
      Item                   &  $\chi^2$ ($p$)                 & Mean & SD \\ \hline
      \emph{Time at Home} &  &  &  \\
      6am-10am & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$six.am.ten.am,vmoo2$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$six.am.ten.am,vmoo2$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo2$six.am.ten.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$six.am.ten.am)-1,na.rm=TRUE)))}  \\ 
10am-2pm &   $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$ten.am.two.pm,vmoo2$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$ten.am.two.pm,vmoo2$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo2$ten.am.two.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$ten.am.two.pm)-1,na.rm=TRUE)))} \\ 
2pm-6pm & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$two.pm.six.pm,vmoo2$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$two.pm.six.pm,vmoo2$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo2$two.pm.six.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$two.pm.six.pm)-1,na.rm=TRUE)))} \\ 
6pm-10pm & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$six.pm.ten.pm,vmoo2$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$six.pm.ten.pm,vmoo2$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo2$six.pm.ten.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$six.pm.ten.pm)-1,na.rm=TRUE)))} \\ 
10pm-2am & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$ten.pm.two.am,vmoo2$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$ten.pm.two.am,vmoo2$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo2$ten.pm.two.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$ten.pm.two.am)-1,na.rm=TRUE)))} \\ 
2am-6am &    \Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$two.am.six.am,vmoo2$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$two.am.six.am,vmoo2$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo2$two.am.six.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$two.am.six.am)-1,na.rm=TRUE)))} \\
& & & \\
Item                   &  $\tau$ ($Z$)                 & Mean & SD \\ \hline
\emph{Aggregate Time Periods} &  &  &  \\
Total Hours & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$total.hours,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$total.hours,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(mean(vmoo2$total.hours*4,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo2$total.hours*4,na.rm=TRUE))} \\ 
Morning Hours (6am-2pm) & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$morning.hours,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$morning.hours,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(mean(vmoo2$morning.hours*4,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo2$morning.hours*4,na.rm=TRUE))} \\ 
Evening Hours (6pm-2am) & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$evening.hours,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$evening.hours,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(mean(vmoo2$evening.hours*4,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo2$evening.hours*4,na.rm=TRUE))} \\ 
& & & \\
Item                   &  $\tau$ ($Z$)                 & Loading & $\alpha$ \\ \hline
\emph{Trust} &  &  &  \\
Your local government. & \Sexpr{ztrunc(cor.test(vmoo2$local.trust,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$local.trust,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.trust2[1,1])} &  \\ 
Scientists. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$scientists.trust,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$scientists.trust,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.trust2[2,1])} &  \\ 
Your utility company. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$trust.utility,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$trust.utility,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.trust2[3,1])}  &  \\ 
Your co-workers. & \Sexpr{ztrunc(cor.test(vmoo2$co.workers.trust,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$co.workers.trust,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.trust2[4,1])}  &  \\ 
Trust Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(-SF.trust2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(-SF.trust2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.trust2[1])}\% & \Sexpr{ztrunc(alpha.ov.trust2.red)} \\
& & & \\
\emph{Self-Efficacy} &  &  &  \\
If something looks too complicated I will not even bother to try it. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$too.complicated,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$too.complicated,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(L.eff[1,1])} &  \\ 
I do not seem capable of dealing with most problems that come up in my life. & \Sexpr{ztrunc(cor.test(vmoo2$not.capable,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(abs(cor.test(vmoo2$not.capable,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(L.eff[2,1])} &  \\ 
When unexpected problems occur I don't handle them very well. & \Sexpr{ztrunc(cor.test(vmoo2$unexpected.problems,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$unexpected.problems,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(L.eff[3,1])} &  \\ 
When I make plans, I am certain I can make them work. & \Sexpr{ztrunc(cor.test(vmoo2$make.plans.work,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$make.plans.work,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(L.eff[4,1])} &  \\ 
Self-Efficacy Factor & \Sexpr{ztrunc(abs(cor.test(SF.eff$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{ztrunc(abs(cor.test(SF.eff$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.eff[1])}\% & \Sexpr{ztrunc(alpha.ov.eff.red)} \\ \hline
\end{tabular}}
\end{table}

Table~\ref{tab:twopsychob} shows the motivation items, including socializing, frugality, and exploration scales.  None of the frugality items were associated with intentions to enroll, indicating individual concerns about saving money may not play much of a role in the decision to volunteer in energy efficiency programs.  None of the socializing items were statistically significant either.  On the other hand, those who like to work problems and explore new subjects were more willing to enroll.

 \begin{table}[hp]
  \caption{Univariate predictions for scales related to motivations to enroll.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:twopsychob}
  \centering
\scalebox{0.85}{\begin{tabular}{p{10cm} c c c}
Item                   &  $\tau$ ($Z$)                 & Loading & $\alpha$ \\ \hline
\emph{Socializing} &  &  &  \\
How many close friends do you have? (meaning people that you feel at ease with, can talk to about private matters, and can call on for help) & \Sexpr{ztrunc(cor.test(vmoo2$close.friends,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$close.friends,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.soc[1,1])} &  \\ 
How many of these friends do you see or talk to at least once every 2 weeks? & \Sexpr{ztrunc(cor.test(vmoo2$talk.two.weeks,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$talk.two.weeks,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.soc[2,1])} &  \\ 
How many times have you attended a party or other social gathering in the past 2 months? & \Sexpr{ztrunc(cor.test(vmoo2$party.gathering,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(abs(cor.test(vmoo2$party.gathering,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.soc[3,1])} &  \\ 
Socializing Factor & \Sexpr{ztrunc(abs(cor.test(SF.soc$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{ztrunc(abs(cor.test(SF.soc$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.soc[1])}\% & \Sexpr{ztrunc(alpha.ov.soc)} \\ 
& & & \\
\emph{Frugality} &  &  &  \\
If you take good care of your possessions, you will definitely save money in the long run. & \Sexpr{ztrunc(cor.test(vmoo2$care.possessions,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$care.possessions,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.fr[1,1])} &  \\ 
If you can re-use an item you already have, there's no sense in buying something new. & \Sexpr{ztrunc(cor.test(vmoo2$reuse.items,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$reuse.items,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.fr[2,1])} &  \\ 
There are things I resist buying today so I can save for tomorrow. & \Sexpr{ztrunc(cor.test(vmoo2$resist.buying,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(abs(cor.test(vmoo2$resist.buying,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.fr[3,1])} &  \\ 
I am willing to wait on a purchase so that I can save money. & \Sexpr{ztrunc(cor.test(vmoo2$wait.purchase,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$wait.purchase,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.fr[4,1])} &  \\ 
Frugality Factor & \Sexpr{ztrunc(abs(cor.test(SF.fr$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{prettyNum(abs(cor.test(SF.fr$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.fr[1])}\% & \Sexpr{ztrunc(alpha.ov.fr.red)} \\ 
& & & \\
\emph{Exploration} &  &  &  \\
I like to try to solve problems that present a mental challenge. & \Sexpr{ztrunc(cor.test(vmoo2$enjoy.mental.challenge,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$enjoy.mental.challenge,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(L.ex[1,1])} &  \\ 
I like to work at a problem until I get it right. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$like.work.problem,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$like.work.problem,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(L.ex[2,1])} &  \\ 
I am always eager to know more about the universe we live in. & \Sexpr{ztrunc(cor.test(vmoo2$eager.to.know,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$eager.to.know,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(L.ex[3,1])} &  \\ 
When I hear about a new subject I like to find out more about it. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$new.subject,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$new.subject,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(L.ex[4,1])} &  \\ 
Exploration Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(SF.ex$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(SF.ex$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.ex[1])}\% & \Sexpr{ztrunc(alpha.ov.ex.red)} \\ \hline
\end{tabular}}
\end{table}
 %$

 Table~\ref{tab:twopsychoc} shows the three topical scales.  As can be seen, the single strongest predictor of intentions to enroll in the trial was expectations of benefit and topical interest in the in-home display.  The belief that plants and animals exist to be used by humans was associated with unwillingness to enroll.  Interest in eco-friendly technology was not associated with intentions to enroll.
 
 %In terms of topics, the program could conceivably be construed as being about the environment, the in-home display, or about new technology in general.  Interest in the first topic was measured using the New Ecological Paradigm (NEP) \cite{dunlap2000new}, a widely used measure of environmental attitudes.  Interest in the second topic was measured using a four-item scale of their attitudes and expectations of the in-home display, such as whether participants expected it to help them save money.  General interest in new technology was assessed using a scale that measured affinity for technology.

%Lastly, two questions assessed other volunteering behaviors, asking whether participants had previously volunteered in an energy efficiency program, and whether they would be willing to complete additional questions after the survey.  Several similar questions asked whether participants engaged in civic behaviors, for example using a public library.
%, weakly associated with the NEP, and strongly associated with expectations of the in-home display.  

 
\begin{table}[hp]
  \caption{Univariate predictions for topical interest scales.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:twopsychoc}
  \centering
\scalebox{0.85}{\begin{tabular}{p{10cm} c c c}
Item                   &  $\tau$ ($Z$)                 & Loading & $\alpha$ \\ \hline
\emph{NEP} &  &  &  \\
Plants and animals exist primarily to be used by humans. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$use.plants.animals,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$use.plants.animals,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(-L.NEP2[1,1])} &  \\ 
The balance of nature is very delicate and easily upset. & \Sexpr{ztrunc(cor.test(vmoo2$delicate.nature,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$delicate.nature,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[2,1])} &  \\ 
There are limits to growth beyond which our industrialized society cannot expand. & \Sexpr{ztrunc(cor.test(vmoo2$growth.limits,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$growth.limits,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[3,1])} &  \\ 
The earth is like a spaceship with only limited room and resources. & \Sexpr{ztrunc(cor.test(vmoo2$spaceship.earth,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$spaceship.earth,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[4,1])} &  \\ 
NEP Factor & \Sexpr{ztrunc(abs(cor.test(-SF.NEP2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{ztrunc(abs(cor.test(-SF.NEP2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.NEP2[1])}\% & \Sexpr{ztrunc(alpha.ov.NEP2)}  \\ 
& & & \\
\emph{IHD Expectations} &  &  &  \\
I would enjoy having an in-home display in my home. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.enjoy,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.enjoy,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[1,1])} &  \\ 
An in-home display would help me save electricity each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.electricity,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.electricity,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[2,1])} &  \\ 
An in-home display would help me save money each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.money,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.money,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[3,1])} &  \\ 
I would learn from an in-home display. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.learn,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.learn,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[4,1])} &  \\ 
IHD Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(-SF.IHD2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(-SF.IHD2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.IHD2[1])}\% & \Sexpr{ztrunc(alpha.ov.IHD2)} \\ 
& & & \\
\emph{Eco-Friendly Technology} &  &  &  \\
I understand the potential damage to the environment that some products can cause. & \Sexpr{ztrunc(cor.test(vmoo2$eco.purchase,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(abs(cor.test(vmoo2$eco.purchase,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.buy[1,1])} &  \\ 
I have switched products for ecological reasons. & \Sexpr{ztrunc(cor.test(vmoo2$switch.products,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$switch.products,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.buy[2,1])}  &  \\ 
I have purchased a household appliance because it uses less electricity than other brands. & \Sexpr{ztrunc(cor.test(vmoo2$eco.appliance.buy,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$eco.appliance.buy,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.buy[3,1])}  &  \\ 
When I have a choice between two equal products, I always buy the one that is less harmful to other people and the environment. & \Sexpr{ztrunc(cor.test(vmoo2$eco.equal.buy,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo2$eco.equal.buy,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.buy[4,1])}  &  \\ 
Purchases Factor & \Sexpr{ztrunc(abs(cor.test(SF.buy$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{prettyNum(abs(cor.test(SF.buy$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.buy[1])}\% & \Sexpr{ztrunc(alpha.ov.buy.red)} \\ \hline
\end{tabular}}
\end{table}

%Hey, are the people who are motivated by IHD expectations different than those motivated by NEP? Is there any kind of clustering we can do? That’s a cool result if there is.  Covariance matrix between factors$

 \subsection{Multivariate Prediction of Enrollment Intentions}
 \subsubsection{Logistic Regression}
 In multiple regression, the only statistically significant predictors were whether they expected to enjoy the in-home display, $t(\Sexpr{glm.2.ov$df.residual})=$ \Sexpr{prettyNum(glm.2.ov$coefficients[2]/sqrt(diag(vcov(glm.2.ov))[2]))}, $p=$ \Sexpr{ztrunc(dt(glm.2.ov$coefficients[2]/sqrt(diag(vcov(glm.2.ov))[2]),glm.2.ov$df.residual))}, whether they expected to learn from the in-home display $t(\Sexpr{glm.2.ov$df.residual})=$ \Sexpr{prettyNum(glm.2.ov$coefficients[5]/sqrt(diag(vcov(glm.2.ov))[5]))}, $p=$ \Sexpr{ztrunc(dt(glm.2.ov$coefficients[5]/sqrt(diag(vcov(glm.2.ov))[5]),glm.2.ov$df.residual))}, whether they trust the utility $t(\Sexpr{glm.2.ov$df.residual})=$ \Sexpr{prettyNum(glm.2.ov$coefficients[8]/sqrt(diag(vcov(glm.2.ov))[8]))}, $p=$ \Sexpr{ztrunc(dt(glm.2.ov$coefficients[8]/sqrt(diag(vcov(glm.2.ov))[8]),glm.2.ov$df.residual))}, and whether they are inactive in the community $t(\Sexpr{glm.2.ov$df.residual})=$ \Sexpr{prettyNum(glm.2.ov$coefficients[9]/sqrt(diag(vcov(glm.2.ov))[9]))}, $p=$ \Sexpr{ztrunc(dt(glm.2.ov$coefficients[9]/sqrt(diag(vcov(glm.2.ov))[9]),glm.2.ov$df.residual))}.  Using factor scores rather than individual items, only the in-home display expectations factor was a significant predictor $t(\Sexpr{glm2.fac$df.residual})=$ \Sexpr{prettyNum(glm2.fac$coefficients[2]/sqrt(diag(vcov(glm2.fac))[2]))}, $p<$ \Sexpr{ztrunc(dt(glm2.fac$coefficients[2]/sqrt(diag(vcov(glm2.fac))[2]),glm2.fac$df.residual))}.

 \subsubsection{Classification Trees}

 Figure~\ref{fig:treeplot2} shows the Classification Tree, including in-home display expectations.\footnote{A second Classification Tree was also used, excluding the in-home display expectations.  It was more complex, including 9 items, and more difficult to understand.  We omit it here.}  Almost all (189/204) participants who expected to enjoy the IHD ($\ge3.5$ on a 5 point scale) said they would volunteer.  Among those who did not expect to enjoy it ($<3.5$ on a 5 point scale), about half (37/70) said they would not want to enroll.  However, among those people, most (27/39) who expected to be in the home in the morning (six am to ten am) would be willing to participate.  If they did not expect to be home, most (21/31) would be unwilling to participate, unless they trusted scientists ($\ge2.5$ on a 5 point scale).  Participants who did not expect to enjoy the IHD, did not expect to be home in the morning, and did not trust scientists, were very unlikely to enroll (17/20).  A surprising branch emerged for those who did expect to be home in the morning.  These participants were likely to want the in-home display if they did not trust their friends a lot ($<3.5$ on a 4 point scale).  If they did trust their friends a lot, then they were willing to volunteer if they felt unable to handle unexpected problems ($\ge2.5$ on a 5 point scale).  This pattern may indicate that those who do not trust their friends a lot and are home during the day feel that they are self-reliant and can thus manage their energy consumption while they are in the home.  On the other hand, those who trust their friends a lot but feel less competent may rely on others to manage their energy consumption.\footnote{Although this branch is surprising, it should be appreciated, and not discarded as noise or error \cite{davis2012surprises}.  The value of Classification Trees is that they can discover unintuitive patterns in the data automatically depending on predictive value rather than whether the patterns fit with what we believe.  If models only included patterns of predictors that ``made sense'' to us intuitively, then our ability to predict volunteering would be severely limited, and biased, by our prior beliefs.  Classification Trees are thus valuable as they can discover unintuitive patterns.}  
 
 %Make sure this is correct
 
<<treeplot2,echo=false,results=hide,fig=false>>=
split.fun<-function(x,labs,digits,varlen,faclen)
{
labs<-c("x1","x2","x3","x4","Trust scientists \n moderately or a lot?","Home between \n six am and ten am?","x7","x8","Unable to handle unexpected \n problems > somewhat?","Do not trust friends a lot?","Expect enjoyment?")
#   gsub(" > ",":\n",labs)
#print(labs)
}
vmoo2$enroll<-factor(vmoo2$enroll, labels=c("Refuse", "Enroll"))
png(file="treeplot2.png",width=5000,height=6500,res=500)
rp2<-rpart(enroll~.,data=vmoo2[,-c(1,8,93)])
par(xpd=NA)
prp(rp2,split.fun=split.fun,extra=102,under=T,yesno=T,clip.right.labs=FALSE,main="",type=0,varlen=0,faclen=0,fallen.leaves=TRUE,branch=1,branch.lty=1,gap=15,border.col=0,split.border.col=0,xflip=FALSE,under.cex=1.3,split.cex=1.2,left=FALSE,branch.tweak=1,yshift=3,split.yshift=2,nspace=1,yspace=0,space=0,split.space=0,split.yspace=0)
dev.off()
#x4: trust scientists>2.5
#x3: home at six am-10am
#x2: enjoy ihd>3.5
#x8: unexpected problems>=2.5
#x7: friends.trust <3.5
@ 

\begin{figure}[p]
\vspace{-7cm}
  \noindent\makebox[\textwidth]{%
    \scalebox{0.7}{\includegraphics{treeplot2}} 
  }
    \caption{Classification Tree predicting enrollment intentions.}
    \label{fig:treeplot2}
\end{figure}

%Figure~\ref{fig:treeplot3} does not include the in-home display expectations item, and shows a much more complicated classification tree.  The driving force is whether the person expects to be in the home during the day (total hours $\ge 16$).  Most of those who expected to be in the home for most of the day were willing to participate, unless they had a high monthly bill ($>\$250$), liked problem solving, were social but did not contribute to a retirement fund and had greater than a two year degree.  Looking at the left branch of the tree, among those who did not expect to be in the home more than 16 hours a day, but did trust scientists, most were willing to paricipate (39/45).  If they also did not trust scientists but talked to their friends frequently, most were willing to participate (17/21).  Only among those who were not in the home often, did not trust scientists, did not see friends often, but were employed full time was there strong unwillingness to volunteer for the trial (14/18 refused). 

<<treeplot3,echo=false,results=hide,fig=false>>=
split.fun<-function(x,labs,digits,varlen,faclen)
{
labs<-c("x1","x2","x3","x4","x5","Not employed \n full time?","Do they see \n 4 friends often?","Trust scientists \n moderately or a lot?",">16 hours \n at home per day?","x10","Monthly bill \n > $250?","x12","Like problem-solving \n > somewhat?","x14","x15","x16","Less than \n two year degree?","Contributed to a \n retirement (e.g., 401k)?","Socialize in the past \n 2 months < 4 times?")
#labs<-c("x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","x11","x12","x13","x14","x15","x16","x17","x18,"x19")
#   gsub(" > ",":\n",labs)
#print(labs)
}
#x9 total hours
#left branch descending
#x8 trust scientists
#x7 talk two weeks
#x6 employment
#right branch descending
#x11 average bill
#x13 problem solving
#x19 party gatherings
#x18 401k
#x17 education
vmoo2$enroll<-factor(vmoo2$enroll, labels=c("Refuse", "Enroll"))
png(file="treeplot3.png",width=5000,height=6500,res=500)
rp2.noihd<-rpart(enroll~.,data=vmoo2[,-c(1,8,62:65,93)])
par(xpd=NA)
prp(rp2.noihd,split.fun=split.fun,extra=102,under=T,yesno=T,clip.right.labs=FALSE,main="",type=0,varlen=0,faclen=0,fallen.leaves=TRUE,branch=1,branch.lty=1,gap=15,border.col=0,split.border.col=0,xflip=FALSE,under.cex=1.3,split.cex=1.2,left=FALSE,branch.tweak=1,yshift=3,split.yshift=2,nspace=1,yspace=0,space=0,split.space=0,split.yspace=0)
dev.off()
@ 

%\clearpage
%\begin{figure}[p]
%\vspace{-6cm}
%  \noindent\makebox[\textwidth]{%
%    \scalebox{1.8}{\includegraphics{treeplot3}} 
%  }
%    \caption{Classification Tree without in-home display items predicting enrollment intentions.}
%    \label{fig:treeplot3}
%  \end{figure}

\subsubsection{Equal Weights}
The range of scores was \Sexpr{prettyNum(min(frame2$eqsum))} to \Sexpr{prettyNum(max(frame2$eqsum))}, with a mean of \Sexpr{prettyNum(mean(frame2$eqsum))} and standard deviation \Sexpr{prettyNum(sd(frame2$eqsum))}.  The correlation between the equal weighted sum of predictor variables and intentions to enroll was $r=$ \Sexpr{ztrunc(cor(frame2$eqsum,as.numeric(frame2$enroll),use="complete"))}.

\subsubsection{Comparison of Methods}
Table~\ref{tab:comptwo} shows the performance of the different classification methods.\footnote{$\phi$ is the correlation between the method's prediction and intentions to enroll. \emph{TPR} is the proportion of people who intend to enroll correctly identified.  \emph{TNR} is the proportion of people who do not intend to enroll correctly identified.  \emph{Error} is the proportion of participants incorrectly classified.  \emph{10-Fold CV} is the estimated generalization error rate using 10-fold cross validation.  \emph{Boot} is the bootstrap estimate of generalization error.}  As can be seen, the Classification Tree performed the best on almost every measure, while the TTB heuristic also performed very well on 10-Fold CV.  

\begin{table}[h]
    \caption{Performance of Classification Methods.}
    \label{tab:comptwo}
  \centering
 \begin{tabular}{c c c c c c c}
 & \multicolumn{4}{c}{In-Sample Error} & \multicolumn{2}{c}{Generalization Error} \\
 Method & $\phi$ & TPR & TNR & Error & 10-Fold CV & Boot \\ \hline
Take the Best & \Sexpr{ztrunc((ttb.2t[1]*ttb.2t[4]-ttb.2t[3]*ttb.2t[2])/ttb.22)} & $\mathbf{\Sexpr{ztrunc(ttb.2t[4]/(ttb.2t[4]+ttb.2t[2]))}}$ & \Sexpr{ztrunc(ttb.2t[1]/(ttb.2t[1]+ttb.2t[3]))} & \Sexpr{ztrunc((ttb.2t[2]+ttb.2t[3])/length(vmoo2$enroll))} & \Sexpr{ztrunc(ttb.2.cv$delta)} & \Sexpr{ztrunc(mean(ttbmiss.2)/length(vmoo2$enroll))} \\ 
Equal Weights & \Sexpr{ztrunc((eqm.3[1]*eqm.3[4]-eqm.3[3]*eqm.3[2])/eqq.3)} & \Sexpr{ztrunc(eqm.3[4]/(eqm.3[4]+eqm.3[2]))} & \Sexpr{ztrunc(eqm.3[1]/(eqm.3[1]+eqm.3[3]))} &  \Sexpr{ztrunc((eqm.3[2]+eqm.3[3])/length(frame2$enroll))} & \Sexpr{ztrunc(glmcveq.3$delta)} & \Sexpr{ztrunc(mean(miss.eqq3)/length(frame2$enroll))} \\
Logistic & \Sexpr{ztrunc((gm.2[1]*gm.2[4]-gm.2[3]*gm.2[2])/gmm.2)} & $\mathbf{\Sexpr{ztrunc(gm.2[4]/(gm.2[4]+gm.2[2]))}}$ & \Sexpr{ztrunc(gm.2[1]/(gm.2[1]+gm.2[3]))} &  \Sexpr{ztrunc((gm.2[2]+gm.2[3])/length(qr$enroll))}  & \Sexpr{ztrunc(glmcv.2$delta)} & \Sexpr{ztrunc(mean(glmiss.2)/length(qr$enroll))} \\ 
Class Tree & $\mathbf{\Sexpr{ztrunc((rp2t[1]*rp2t[4]-rp2t[3]*rp2t[2])/rp2tt)}}$ & $\mathbf{\Sexpr{ztrunc(rp2t[4]/(rp2t[4]+rp2t[2]))}}$ & $\mathbf{\Sexpr{ztrunc(rp2t[1]/(rp2t[1]+rp2t[3]))}}$ & $\mathbf{\Sexpr{ztrunc((rp2t[2]+rp2t[3])/length(vmoo2$enroll))}}$ & $\mathbf{\Sexpr{ztrunc(mean(fold.cart2.miss))}}$ & $\mathbf{\Sexpr{ztrunc(mean(rp2miss)/length(vmoo2$enroll))}}$ \\ \hline
%Class Tree (No-IHD) & \Sexpr{ztrunc((rp2t.n[1]*rp2t.n[4]-rp2t.n[3]*rp2t.n[2])/rp2tt.n)} & \Sexpr{ztrunc(rp2t.n[4]/(rp2t.n[4]+rp2t.n[2]))} & $\mathbf{\Sexpr{ztrunc(rp2t.n[1]/(rp2t.n[1]+rp2t.n[3]))}}$ &  \Sexpr{ztrunc((rp2t.n[2]+rp2t.n[3])/length(vmoo2$enroll))} & \Sexpr{ztrunc(mean(fold.cart2.miss.n))} & \Sexpr{ztrunc(mean(rp2miss.n)/length(vmoo2$enroll))} \\ \hline
%Logistic (F)  & \Sexpr{ztrunc((gm2.fac[1]*gm2.fac[4]-gm.fac[3]*gm2.fac[2])/gmm2.fac)} & \Sexpr{ztrunc(gm2.fac[4]/(gm2.fac[4]+gm2.fac[2]))} & \Sexpr{ztrunc(gm2.fac[1]/(gm2.fac[1]+gm2.fac[3]))} & \Sexpr{ztrunc((gm2.fac[2]+gm2.fac[3])/length(qr$enroll))} & \Sexpr{ztrunc(glmcv2.fac$delta)} & \Sexpr{ztrunc(mean(glmiss2.fac)/length(qr$enroll))} \\ 
\end{tabular}
\end{table}

\subsection{Discussion}
Volunteer bias undermines the ability to generalize the results of any study from the sample to the population.  Previous electricity field trials have severe volunteer bias problems due to recruiting those in the treatment and control groups differently.  Although two previous studies adjusted for volunteer bias using the propensity score approach, the models they used could be improved.  This paper addressed the three problems of creating such a model: 1) finding the right predictors, 2) combining the right predictors, and 3) using only few predictors.  

Our ability to choose the right predictors had varying success.  We divided predictors into \emph{Demographics}, \emph{Constraints}, \emph{Motivations}, and \emph{Topical Interest}.  Demographics were unsuccessful at predicting enrollment intentions.  This is consistent with prior research that suggests contextual factors of the program being offered matter more than fixed factors of the volunteer, but inconsistent with approaches to volunteering or market segmentation that assume there are underlying fixed traits of participants that may make them volunteer.

In contrast with demographics, constraints or barriers to participation, as a category, predicted enrollment intentions well.  If people did not expect to be home during the day, especially from 6am-10am, then they saw no point in volunteering for the trial, indicating that the in-home display was seen as a device that they would check in the morning before going to work.  People who distrusted scientists or the utility company, two of the three major institutions involved in the trial (the other being the Department of Energy), also saw no point in enrolling.  Because people trusted scientists more than any of the other institutions, focusing on the scientific purpose and validity of the study may be effective.  The last constraint was whether participants felt they could use the device, or whether they saw it as too complicated for them to handle.  Those who reported low self-efficacy, in terms of having difficulty solving complicated problems, also reported that they did not want to enroll in the study, indicating that the in-home display was seen as complicated to use.  Our results show that examining constraints or barriers to participation in the study should be the first place to look when trying to predict volunteering.

Motivations to participate, as stable psychological factors or ``traits,'' were not consistently related to intentions to enroll.  Motivations to care more about the environment, be more energy friendly than others, and save money in general were not predictive at all.  Like demographics, each of these factors can be considered stable traits of the participant, indicating that stable personality factors that are independent of the volunteering context are unlikely to be useful predictors of volunteering.  There was some success, however, as those who enjoyed exploring new things felt as though the in-home display could satisfy that curiosity.  This desire to explore may be useful for recruiting participants broadly, but could also be an anomaly of the motivations category, as enough items were tested that one or two could be statistically significant purely by chance.

Interest in the topic was also not a great category of predictors, except for interest in the specific technology being offered, the in-home display, which was the single greatest predictor of enrollment intentions.  Thus, beliefs about benefit and interest in the specific technology being offered should be carefully measured to account for volunteer bias.  Interest in the environment or new technologies were less predictive.  

%Surprisingly, self-reports of volunteering for previous energy efficiency programs and actual willingness to help the researcher by completing an additional questionnaire were unrelated to volunteering for the program.

%The results seem to indicate that latent characteristics of people have less to do with volunteering than one would think.  In contrast, factors that have to do with the context of what was being offered, such as trust in the institutions promoting the offer (scientists and the utility company), as well as expecting to be able to use the device by being in the home in the morning, seemed to matter more.  The psychometric properties (e.g., internal consistency, uni-dimensionality) of most of the scales were adequate, but none performed well at predicting intentions to volunteer, suggesting that while these scales may be capturing specific psycho-demographics, the  `psycho-demographic' approach to predicting volunteering is itself ineffective.  Instead, people seemed to be concerned with practical issues, such as expected benefit, convenience, feasibility, and trust.

Addressing the second problem of prediction, the Classification Tree and TTB heuristic performed well on estimates of in-sample error and generalization error.  The success of the Classification Tree but not Logistic Regression suggests that the variables measured were not independent, instead interacting with each other configurally.  The failure of the equal weights rule also supports this; if independent predictors are equally weighted, they should perform much better than using only the best predictor.  Instead, most of the variables used were either redundant or worked in concert with other predictors (not independently).  

%The simple take the best heuristic and more complex classification trees performed the best, are easily interpretable, simple to conduct and communicate.  Classification trees can automatically pick up configural (interactive) relationships that would have to be manually specified for Logistic regression.  Although only two more sophisticated methods were shown here, our research initially included almost every important machine learning algorithm of the past 20 years.  However, these approaches were more complex, more difficult to use, and performed worse or much worse in terms of generalization error, sometimes dramatically overfitting the data.  Thus, the TTB heuristic and Classification Trees can be seen as the default predictive approach and natural extension for social science and behavioral prediction.  The onus is on standard (Logistic regression) or more sophisticated machine learning techniques (e.g., Support Vector Machines) to prove that they can perform better in both fitting the data, generalizing to new data, and also can be communicated to other researchers, utilities, and the public who need to use the results.

Lastly, the study suggests that a survey consisting of only five questions could fully implement the best statistical model, the Classification Tree.  These five items were expectations of benefit from the in-home display, whether one would be home in the morning, whether one trusts one's friends, whether one trusts those offering the technology, and whether one feels competent enough to use the technology.  These five questions would likely take less than one minute to complete, increasing the likelihood of non-responders returning the survey, if approached correctly and adequately incentivized.  Thus, long burdensome questionnaires eliciting large amounts of psychodemographic information are unnecessary.  While the five question survey would perform very well, the study also indicated that one would not perform much worse by having a single question survey, using the most predictive item.

%There are two important limitations to this research.  First, we did not collect data on actual volunteering for a real energy effiiency program.  Second, because we use an MTurk convenience sample, we do not have information on participants who did not volunteer for the survey.  

\section{Conclusion}
This paper demonstrates that a simple questionnaire, requiring less than one minute to complete, can be used to augment other variables that have previously succeeded in predicting enrollment in an energy efficiency trial, such as weather sensitivity \cite{george2010load}, using fans to reduce costs, the number and type of people in the household, and living in a single-family detached home \cite{sbc2006evaluation,sbc2007evaluation}.  We found that constraints on whether one can benefit from the program, interest in exploring new things, and expected benefit of the technology predict intentions to enroll well.  We also found that Classification Trees and the best predictor can easily and effectively discover the best way to combine these factors to predict enrollment intentions.

This research is limited because it only evaluates intentions to enroll in a hypothetical, but not real, trial.  Even though the data did not come from an actual recruitment, the approach we used is a valid guideline on how to develop a short set of questions that can be administered to customers via an electricity bill, short phone call, or mailing.  Using this approach can minimize volunteer bias problems, which are costly both financially and scientifically.  

%The Classification Tree without in-home display items was much more difficult to explain.  Figure~\ref{fig:treeplot3} does not include the in-home display expectations item, and shows a much more complicated classification tree.  The driving force is whether the person expects to be in the home during the day (total hours $\ge 16$).  Most of those who expected to be in the home for most of the day were willing to participate, unless they had a high monthly bill ($>\$250$), liked problem solving, were social but did not contribute to a retirement fund and had greater than a two year degree.  Looking at the left branch of the tree, among those who did not expect to be in the home more than 16 hours a day, but did trust scientists, most were willing to paricipate (39/45).  If they also did not trust scientists but talked to their friends frequently, most were willing to participate (17/21).  Only among those who were not in the home often, did not trust scientists, did not see friends often, but were employed full time was there strong unwillingness to volunteer for the trial (14/18 refused).  The classification tree was able to outperform TTB and logistic regression because it took advantage of several additional factors beyond IHD expectations.  The most important was whether people expected to be in the home during the day, especially the morning hours (six am to ten am).  Thus, even if they didn't expect to enjoy it, they would still use it.  This was further modified by their social engagement, with those less socially engaged more likely to want the IHD.  This likely indicates that those who are frequently in the home, such as parents with children, caretakes, elderly, etc, would find it useful for managing the household.  Finally, even if people did not expect to be in the home and did not expect to enjoy it, they still would enroll if they trust scientists, indicating that they cared about the scientific validity of the study, not wanting to bias the results by failing to participate.

%\cite{sommerville2001s} \cite{norton1994characteristics} \cite{mcdonnell1979study} \cite{allsup2002difficulties} \cite{rupp2002selection} \cite{plsek2001challenge} \cite{donovan2003can} \cite{fleissig2001results} \cite{jenkins2002describing} \cite{donovan2002quality} \cite{mills2003perceptions} \cite{aaronson1996telephone} \cite{lowton2005trials} \cite{barrett2002treatment} \cite{fry2001love} \cite{liaschenko2001children} \cite{hunninghake1987recruitment} \cite{kingry2007recruitment} \cite{locker1990response} \cite{launer1994nonresponse} \cite{hoeymans1998non} \cite{herzog1988age} \cite{boersma1997characteristics} \cite{inskip2006cohort} \cite{angus2003requirement} \cite{edwards2002increasing} \cite{boynton2004administering} \cite{mccoll2001design} \cite{young2001health} \cite{woolf2000selection} \cite{stanley2003uptake} \cite{rogers1998conventional} \cite{mutch1985obtaining} \cite{prescott1999factors} \cite{watson2006increasing}

%\subsection{Predictive Performance, Posterior Predictive Checks, and Network of Models} In this section we plot and compare posterior predictions of models to data; develop network of models that tries to explain the efficacy of the different approaches. Posterior predictive checks: that is, taking the fitted model and using it to simulate replicated data, which are then compared to the observed dataset.  \url{http://andrewgelman.com/2009/02/confusions_abou/}.  A Bayesian formulation of Exploratory data anlaysis and goodness of fit testing.  Bayesian checking of the second levels of hierarchical models.  

\clearpage
\appendix
\numberwithin{table}{section}
\section{Materials}
\label{app:meastwo}

All participants first read the following introduction:

\begin{quote}
  
  Our main survey asks you to evaluate an electricity information program and answer some questions about yourself.  To receive payment you only have to answer this main survey.
  
  It would also really help us to know what you thought about the survey.  Are you willing to answer some additional questions after the main survey?  You will not be paid for these additional questions.
\begin{itemize}
  \item Sure, I'll help out once I've finished.
    \item No thanks, I'll just complete the main survey.
      \end{itemize}
\end{quote}

\begin{quote}
In this survey, we would like you to evaluate a program we are considering offering to residential electricity customers.  

Please read the description on the next page carefully and judge whether you would want to participate in this program or not, if it were offered to you.  THen, answer some questions about yourself in the main survey.  Thank you!
\end{quote}

They then read the recruitment offering:

\begin{quote}
To whom it may concern,

Scientists at Carnegie Mellon University need your help to understand the best way to provide electricity information.

You have been chosen to receive a free in-home display.  On the display you can see your home electricity use for one year.

You will evaluate the display for 1 year.  At the end of the year, you can keep it if you like it or return it for a \$25 gift certificate.  We will send you four short surveys (one every 3 months) over the year asking how useful you find the display and about your electricity use.  If you choose to participate, you will receive the frame 3 weeks from now.  There will be no cost to you and your information and survey responses will be confidential, as is university policy.

We would like you to complete the attached questionnaire, even if you are unsure about participating.  To thank you for your help, we have included a \$2 bill.  You can keep this \$2 whether or not you participate in the study.

To understand if this display could benefit every resident in the area, it is very important that the people we choose to include in the study agree to do so.  This helps us make sure that we have a representative set of participants, and is a critical part of the science we do.

If you want good, representative research to be conducted, please do participate.

If you would like to participate or to just ask questions, please do one of the following:

Call us: 1-800-111-1111 Email us: electricityframestudy@cmu.edu \\
Return the enclosed questionnaire \\
Return the enclosed postcard \\

If we don't hear from you in a week, we will give you a call.  If you don't want to participte, please call us, email us, or return the postcard checking the `no thanks' box.

Thank you,
Carnegie Mellon University Research Team
\end{quote}

The main dependent variable was their response to the following question:
\begin{quote}
  Would you enroll in this offering if it were available to you? (Y/N)
  \end{quote}

Standard demographics such as age, gender, employment status (full time, part time, unemployed, student, homemaker, retired), education (less than high school, high school/GED, college, associate's degree, 4 year degree, and professional degree), annual household income, race, and political affiliation.  We also asked if they were the primary billpayer, how many adults and children (under age 18) lived in the home, how many hours they spent in the home per day, and when they are usually in the home with six periods (10am-2pm, 2pm-6pm, 6pm-10pm, 10pm-2am, 2am-6am, 6am-10am).

Next are a series of questions used to predict intentions to volunteer.  Next to each question is a code (e.g., IHD1+) that will be used to reference the item in the text, and the +/- stands for our a priori predicted direction, where + means it will be associated with increased likelihood of enrolling, and - decreased likelihood of enrolling.

Several social comparison questions were used:
\begin{quote}
  To what extent do you disagree or agree with the following statements?
  Compared to the average household in my city...
  \begin{itemize}
   \item My household has done more to reduce its electricity consumption. [more.reduce+]
     \item My household cares more about the environment. [care.environment+]
       \item My household recycles more consistently. [consistent.recycle+]
         \item My household is more active in the community. [active.community+]
           \end{itemize}
  \end{quote}

The motivation questions were (strongly disagree to strongly agree):
\begin{quote}
To what extent would the following messages encourage you to join an electricity efficiency program?
  \begin{itemize}
   \item Increasing independent energy security for the US. 
     \item Protecting the environment. 
       \item Avoid wasting energy.
         \item Increasing personal control over energy use.
           \end{itemize}
  \end{quote}

Next was the New Ecological Paradigm scale.  They were asked:
\begin{quote}
  To what extent do you disagree or agree with the following statements (strongly disagree to strongly agree):
\begin{itemize}
      \item Plants and animals exist primarily to be used by humans. [use.plants.animals-]
\item The balance of nature is very delicate and easily upset. [delicate.nature+]
\item There are limits to growth beoyind which our industrialized society cannot expand. [growth.limits+]
       \item The earth is like a spaceship with only limited room and resources. [spaceship.earth+]
  
%  \item Humans have the right to modify the natural environment to suit their needs. [modify.environment-]
%    \item Plants and animals exist primarily to be used by humans. [rule.nature-]
 \end{itemize}
\end{quote}

Several new technology and eco-purchasing questions:
\begin{quote}
  To what extent do you disagree or agree with the following statements?
  \begin{itemize}
      \item I am always eager to be the first to buy a new technology. [buy.new.tech+]
        \item I understand the potential damage to the environment that some products can cause.  I do not purchase these products. [eco.purchase+]
          \item I have switched products for ecological reasons. [switch.products+]
            \item I have purchased a household appliance because it uses less electricity than other brands. [eco.appliance.buy+]
              \item When I have a choice between two equal products, I always buy the one that is less harmful to other people and the environment. [eco.equal.buy+]
                \item I only buy new products after the ones I have wear out or become obsolete. [obsolete.buy+]
                  \end{itemize}
  \end{quote}

One question asked participants to specify how much they trusted several different entities:
\begin{quote}
  How much do you trust the following groups to look out for you? (not at all, somewhat, moderately, a lot)
  \begin{itemize}
    \item The federal government [federal.trust+]
      \item Your local government [local.trust+]
       \item Scientists [scientists.trust+]
         \item Your utility company [utility.trust+]
           \item Your community [community.trust+]
             \item Your family [family.trust+]
               \item Your friends [friends.trust+]
                 \item Your co-workers [co.workers.trust+]
                   \end{itemize}
  \end{quote}


The behavioral volunteering items:
\begin{quote}
 In the past 12 months have you done any of the following?
 \begin{itemize}
       \item Bought Compact Flourescent Lights (CFLs) [cfl+]
      \item Used an electricity tracking device (e.g., an in-home dispay) [tracking.device+]
        \item Bought one or more energy efficient appliances [efficient.appliances+]
          \item Insulated my home [insulated.home+]
            \item Got a flu shot [flu.shot+]
              \item Recycled [recycled+]
                \item Contributed to a retireement savings (e.g., 401k) [401k+]
                  \item Used the public library [library+]
                    \item Enrolled in prize drawings [prize.drawings+]
                      \item Donated time to a charity or non-profit organization [donated.time+]
                        \item Donated money to a charity or non-profit organization [donated.money+]
                          \item Bought a lottery ticket [lottery.ticket+]
   \item Signed a petition [signed.petition+]
     \item Attended a protest or demonstration [protest.demonstration+]
       \item Voted in a local or national election [voted.election+]
         \item Attended a group meeting (e.g., Neighbourhood Watch, parent-teachers' association, social clubs, recreational groups, professional organizations, etc.). [group.meeting+]
           \end{itemize}
 \end{quote}

They were then asked several questions about their awareness of technology:
\begin{quote}
  Do you currently have a smart meter in your home (Yes/No/Don't know) [have.meter+] \\
  Have you heard of in-home electricity displays before this survey? (Yes/No/Don't know) [heard.ihd+] \\
  To what extent do you agree or disagree with the following statement? (strongly disagree to strongly agree)
\begin{enumerate}
\item An in-home display would help me save electricity each month. [ihd.save.electricity+]
  \item An in-home display would help me save money each month. [ihd.save.money+] 
   \item I would enjoy having an in-home display in my home. [ihd.enjoy+] 
     \item I would learn a lot from an in-home display. [ihd.learn+]
\end{enumerate}

\end{quote}

The next questions were on self-efficacy:

\begin{quote}
  To what extent do the followign statements describe you? (not at all true, slightly true, somewhat true, very true, extremely true)
  \begin{itemize}
    \item If something looks too complicated I will not even bother to try it. [too.complicated-]
      \item When I decide to do something, I go right to work on it. [right.to.work+]
        \item I do not seem capable of dealing with most problems that come up in my life. [not.capable-]
          \item When unexpected problems occur I don't handle them very well. [unexpected.problems-]
            \item When I make plans, I am certain I can make them work. [make.plans.work+]
              \end{itemize}
  \end{quote}
  
There were then several questions on curiosity and exploration
\begin{quote}
  To what extent do the following statements describe you? (not at all true, slightly true, somewhat true, very true, extremely true)
  \begin{itemize}
    \item I like to read books by writers I've not come across before. [new.writers+]
      \item I like to try to solve problems that present a mental challenge. [enjoy.mental.challenge+]
        \item I like to work at a problem until I get it right. [like.work.problem+]
          \item I like to look up new words in a dictionary. [new.words+]
            \item I am always eager to know more about the universe we live in. [eager.to.know+]
              \item When I hear about a new subject I like to find out more about it. [new.subject+]
                \end{itemize}
  \end{quote}

The next set of questions pertained to frugality:
\begin{quote}
To what extent do you agree or disagree with the following statements?
  \begin{itemize}
\item If you take good care of your possessions, you will definitely save money in the long run. [care.possessions+]
  \item There are many things that are normally thrown away that are still quite useful. [no.throw.away+]
    \item Making better use of my resources makes me feel good. [better.use.resources+]
      \item If you can re-use an item you already have, there's no sense in buying somehting new. [reuse.items+]
        \item I believe in being careful in how I spend my money. [careful.spending+]
          \item I discipline myself to get the most from my money. [discipline.money+]
            \item I am willing to wait on a purchase so that I can save money. [wait.purchase+]
              \item There are things I resist buying today so I can save for tomorrow. [resist.buying+]
                \end{itemize}
  \end{quote}

There was a final question about socialization:
\begin{quote}
  Last of all, the following questions ask about how many people yousee or talk to on a regular basis. (1, 2, 3, 4, 5, 6, 7 or more).
  \begin{itemize}
    \item How many close friends do you have? (meaning people that you feel at ease with, can talk to about private matters, and can call on for help). [close.friends+]
      \item How many of these friends do you see or talk to at least once every 2 weeks? [talk.two.weeks+]
        \item How many times have you attended a party or other social gathering in the past 2 months? [party.gathering+]
          \end{itemize}
  \end{quote}

\clearpage
\bibliographystyle{unsrt}
\bibliography{/home/alex/Dropbox/masterbib}

\clearpage
\section*{Acknowledgements}
All materials and data, including completely reproducible statistical analyses in Sweave, can be obtained from the first author's Dataverse\footnote{\url{http://hdl.handle.net/1902.1/19154}}. 

We thank Jay Apt and Baruch Fischhoff for their thoughtful comments and support.

This work was supported by the center for Climate and Energy Decision Making (SES-0949710), through a cooperative agreement between the National Science Foundation and Carnegie Mellon University

This material is based upon work supported by the Department of Energy under Award Numbers DE-OE0000300 and DE-OE0000204.  Disclaimer: This report was prepared as an account of work sponsored by an agency of the United States Government. Neither the United States Government nor any agency thereof, nor any of their employees, makes any warranty, express or implied, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately owned rights, Reference herein to any specific commercial product, process, or service by trade name, trademark, manufacturer, or otherwise does not necessarily constitute or imply its endorsement, recommendation, or favoring by the United States Government or any agency thereof. The views and opinions of authors expressed herein do not necessarily state or reflect those of the United States Government or any agency thereof.

\end{document}

TTB is similar to lexicographic strategies (Payne, Bettman, and Johnson, 1993); eliminatin by aspects (Tversky, 1972); Relevance theory (Sperber, Cara, and Girotto, 1995); Optimality theory: prince and Smolensky, 1991.  

The recruitment described the PowerCost Monitor as follows:

To help participants better manage their electricity consumption, we will be offering a number of the pilot participants a free Power Cost monitor valued at \$150. If you are selected to participate in the pilot and then become one of the participants selected to receive the monitor, you will be able to see your electricity consumption on a real-time basis. The monitor also makes it possible to track electricity consumption during the month. Since quantities are limited, the Power Cost monitors will be offered on a first-come-first-serve basis to customers who are selected to participate in the pilot.

The rest of the document is shown below:

Hydro One Recruitment Letter
Subject: Time-of-Use Pilot Project
Dear : 

As electricity conservation becomes more vital to sustaining a reliable supply of electricity in Ontario, Hydro One Networks Inc. (``Hydro One'') is committed to working with you to develop effective conservation and demand management programs. We applied to the Ontario Energy Board (OEB) and received their approval to undertake a time-of-use (TOU) pricing pilot for about 500 customers from May 1 to September 30, 2007, to study how TOU rates affect the way people use electricity. 

As one of Hydro One's first customers to have a smart meter installed, we are pleased to invite you to participate in this pilot project. If you are eligible to participate in this project, you will have the opportunity to see how much energy you can shift and save under the TOU rates. 

How does the pilot project work? 

Pilot participants will pay the OEB-approved TOU energy rates for five months (May through September, 2007) instead of the current Regulated Price Plan (RPP) energy rates (see the table below for comparison). Please note that the TOU rates pertain only to the electricity commodity prices and will not affect other charges on your electricity bill. During the pilot period, participants will receive a monthly bill clearly showing their electricity consumption differentiated by the TOU rate calculation. After September 30, 2007, participants will return to the regular RPP rates that they are now paying and to their usual billing arrangements. TOU rates during the pilot period will encourage participants to shift electricity consumption from the more expensive on-peak period to the less expensive off-peak period, resulting in lower electricity payments for the same consumption.

How will the TOU pilot benefit participants? 

Participants in the pilot will be able to take advantage of the lower off-peak electricity rates by switching their use from peak hours to off-peak hours. For instance, they will be able to save money by running the dishwasher during off-peak hours and by doing laundry on the weekends. Some participants who do not shift enough of their usage to off-peak hours may actually see an increase in their energy bill during the pilot. 

To help participants better manage their electricity consumption, we will be offering a number of the pilot participants a free Power Cost monitor valued at \$150. If you are selected to participate in the pilot and then become one of the participants selected to receive the monitor, you will be able to see your electricity consumption on a real-time basis. The monitor also makes it possible to track electricity consumption during the month. Since quantities are limited, the Power Cost monitors will be offered on a first-come-first-serve basis to customers who are selected to participate in the pilot. 

Pilot participants will also receive an energy efficiency kit with two compact fluorescent lights, a timer and energy saving tips. To help pilot participants better understand their electricity consumption patterns during the pilot, they will have access to a website showing their daily consumption profiles, which will be updated on a weekly basis. 

How can you apply to participate? 

Over the next two weeks, you may be contacted by Hydro One staff to determine your eligibility to participate in the TOU pilot. Please note that participation in the pilot is completely voluntary. If you are interested in participating or have any questions regarding this pilot, please call us at 1-866-258-8333 during office hours. This is a toll-free phone number specifically set up for the pilot. Alternatively, you can send an email to LoadResearch@HydroOne.com. 

If you are selected to participate in the pilot, you will need to sign an agreement with Hydro One Networks Inc., agreeing to participate and to pay the TOU rates during the pilot period. You will also be asked to fill out two questionnaires, one at the beginning and of the pilot and the other at the end of the pilot, to provide further information to help the project team better understand the reasons for potential changes in your consumption patterns. 

Thanks in advance for your interest. 
Hydro One TOU Pilot Team

There are three goals for this paper.  The first is to create a simple questionnaire that can be used to accurately predict who will volunteer in an energy efficiency program, using  in-home displays as an example.  We do this by constructing several questionnaires based on previous research on volunteering \cite{rosenthal1975volunteer}, and then evaluating the predictive and psychometric \cite{devellis2011scale,loehlin2004latent,demars2010item,spector1992summated} properties of these questionnaires.  The second goal is to develop a model based on this questionnaire that can be broadly applied to volunteer problems in energy efficiency research.  The third goal is to demonstrate a method for developing predictive social science models of human behavior.  In particular, when developing our predictive models we pay careful attention to actual prediction, rather than fitting the data.  This sensitivity comes from knowledge of the psychology of prediction and explanation, as people believe that predictions are more certain than they really are \cite{soyer2012illusion}, and it is possible to fit the data well by fiddling with the predictors and sample to get a desired result \cite{leamer1983let,fischhoff1982those}.  To do this, we benchmark more complex machine learning algorithms against simpler models that usually outperform complex models \cite{armstrong1985crystal,dana2004superiority,gigerenzer1999betting}, using cross-validated and bootstrapped errors metrics for estimates of generalization error \cite{efron1993introduction}, and also compare the ability of the different models to predict real, independently sampled, test data.

\section{Prevalence of Volunteer Bias}

Aside from four exceptions, almost every field study on the topic of in-home displays has suffered from some form of volunteer bias.\footnote{This does not merely apply to in-home display studies, but we focus on them here to keep the discussion shorter.  Davis \emph{et al.} provide additional references \cite{davis2012setting}.}  The first exception, the Polk's Landing study \cite{mcclelland1979energy}, had displays installed in homes before people bought them, with no way for buyers to know which homes had the displays beforehand.  The Southern California Edison study \cite{sexton1987consumer} used an opt-out design with an opaque opt-out procedure, resulting in no opt-outs.  The Commonwealth Edison Energy Smart Pricing Pilot with Pricelight study \cite{sbc2006evaluation,sbc2007evaluation} explicitly modeled selection using a propensity score approach, and found that volunteers were more likely to live in single-family homes, with fewer household members, higher incomes, and more internet access than those who chose not to participate.  PG\&E's Smart-Rate Pilot \cite{george2010load} also used a propensity score adjustment for volunteer bias.

The usual volunteer bias, where people self-select into the study and are then randomly assigned to condition, leads to uncertainty in inferences from sample to population.  The BC Hydro PowerCost Monitor Time-of-Use pilot \cite{sulyma2008experimental} used single family dwellings in British Columbia (lower mainland, Vancouver Island, the North) with an opt-in design.  Those recruited were more educated (with university degrees) and had higher annual household income compared to the BC population of single detached homes.  The pilot participants were also more knowledgeable about electricity conservation, more active in trying to save energy, more willing to change habits, and used on average 1700 kWh less than other single family dwellings in the area.  This study fits into the `usual' volunteer bias category because random assignment occurred \emph{after} participants opted in, thus allowing valid comparisons between groups in the sample.

A second type of volunteer bias, where studies recruit the control and treatment groups differently, not only causes uncertainty in inference from sample to population, but also between groups within the sample.  This means that any comparison between treatment and control groups cannot separate the effectiveness of the treatment (the in-home display) from differences between how samples were obtained.  

Almost every field study of in-home displays succumbs to this more severe form of volunteer bias.  The Omaha Public Power study \cite{eiden2009investigation} contacted 2000 members of an existing pilot study by phone or email, resulting in with 199 eligible respondents and 151 successful recruits.  The control group was not recruited at all.  Instead 95 homes were taken as a representative sample based on load forecasts.  The Florida Power and Light Energy Dective study \cite{parker2008pilot} recruited participants based on interest in the study, providing a free installation and device (The Energy Detective) to 22 homes.  The control group was a non-volunteer sample of Florida Power and Light customers.  The Milton Hydro Direct Energy Smart Home Energy Conservation Kit study \cite{schembri2008influence} recruited participants through telephone, direct mail, and billing inserts.  Eligible participants were 18 years or older, who lived in the home for at least one year and did not plan to move out, and would be willing to complete two surveys.  Once they expressed interest and registered online, participants were contacted for an installation appointment based on the order they registered, resulting in 108 homes having an in-home display installed for free.  The control group consisted of 23 customers out of 300 recruited who lived in geographic clusters near the treatment group homes, that were judged to be similar in size and age of the home to that in the treatment group, and who completed a survey for a \$100 gift certificate.  The Oberlin TED5000 study \cite{allen2006effects} recruited 60 households in Orberlin Ohio who completed a door-to-door energy survey.  Half of those recruited were in a high income neighborhood, and the other half were in a low income neighborhood.  Five high and five low income households were invited to participate based on who was home and who agreed to participate.  The control group consisted of the remaining 50 households that were either not contacted or explicitly refused to participate.  The Ontario Energy Board Hydro One pilot \cite{frank2008hydro} used participants selected randomly from a stratified sample of 23,000 customers.  They sent 3,100 invitations, contacted 2,700 by telephone, eventually ending up with 411 customers (13\% recruitment).  Those who were recruited knew the treatment they would receive when they decided to volunteer, and the control group was recruited separately.  The Energy Trust of Oregon PowerCost Monitor study \cite{sipe2009net} allowed 164 participants to purchase the PowerCost Monitor on a website for \$29.99 and self-install, and also installed 201 devices during home energy audits.  Control groups were sampled either from the general Oregon population based on several stratification variables (e.g., weather) or sampled from those who had home energy audits but did not receive the in-home display.  Thus, neither control groups volunteered, but both treatment groups did.  The Baltimore Gas and Electric Smart Energy Pricing Pilot with the Energy Orb \cite{faruqui2009bge} randomly sampled 5,000 customers, mailed information about the pilot inviting them to join, and allowed them to respond by email or phone.  Participants were first sent a flyer specific to the treatment program with program rate, potential critical peak hours, and tips for reducing electricity to save money.  They were then followed up with a recruitment phone call with more information and a chance to sign up.  If participants did not respond they were contacted by phone.  The study recruited 117/440 customers ($\sim$27\%)  with 33 (28\%) dropouts due to 24 changing their mind, 4 move outs, 3 technical issues, and one with a tenant dwelling.  In their interval meter test sample they recruited 33/200 (16.5\%) with 5 (15\%) dropouts.  They were offered \$150 or \$100 to participate upon completing the program.  They then assigned those who were not contacted or refused to participate to the control group.

There is also an intermediate case where random assignment occurs \emph{before} volunteering.  For example, Connecticut Light and Power \cite{faruqui2009connecticut} attempted to enroll 117 customers for each treatment condition with over-enrollment in expectation of move-outs and opt-outs.  Customers were randomly assigned to condition before volunteering.  Customers were assigned to a marketing wave to avoid early responder bias, were given two weeks to respond to the pilot invitation through direct mail or calls, and if they didn't respond they were not part of the study and the next wave started (waves every 2 weeks).  The uptake rate through direct mail and phone calls was 3.1\%.

%Intuitive vs. Machine (optimal) vs. Random; Crowdsourced paramorphic bootstrap models \& TTB \& Dawes' Eq. Weights; Recursive partitioning \& MLE Logistic.  Have people/experts rank what are going to be the best predictors or the best weights vs. use model selection with machine learning vs. random choice benchmark. Have them make probability estimates on how likely they think their judgment is correct for each prediction.  Random weights vs. random predictors vs. optimal weights vs. intuitive weights. vs. intuitive predictors  This document will describe our research on predicting volunteerism. [Tamar insert intro here]

\section{Recruitment to Minimize Volunteer Bias}
The first line of defense against volunteer bias is good experimental design, as whether people will volunteer depends on the approach taken to recruit them.\footnote{Aside from the recruitment approach, participants must be randomly assigned to experimental conditions \emph{after} they decide to volunteer.}  For example, people are swayed by whether they believe not participating could bias the study results \cite{williams2007no}, they are more likely to participate if visited in the home, and minor adjustments to the training of recruiters (reviews, training, advice, and feedback) can increase recruitment levels dramatically \cite{donovan2009development}.  

Possibly the most important element of the recruitment approach is how non-response (rather than explicit refusal) is interpreted.  Broadly, there are two possibilities: opt-out and opt-in.  An opt-out design includes all sampled households unless they choose not to participate.  Participants are contacted with an information leaflet describing the aims of the study and can opt-out by returning a postcard.  If the postcard is not returned in two weeks they are contacted via telephone, and are entered into the study unless an explicit ``no'' response occurs \cite{williams2007no,junghans2005recruiting}.  Opt-out allows generalization of study results to all consumers.  In the opt-in design, participants are sent an information leaflet and are asked to reply with a postcard.  If they reply they are then called on the phone to further arrange participation, but are not contacted otherwise, and are not entered into the study \cite{junghans2005recruiting}.  An opt-in design allows generalization to consumers who are explicitly willing to participate in the study.

The main difference between the two approaches is how they interpret a customer's non-response to an invitation to participate.  Opt-out assumes that non-response indicates the participant has yet to decide whether to participate, or that non-response is consent.  Opt-in assumes that non-response is refusal to participate.

The empirical evidence from biomedical research supports the former conclusion rather than the latter; that is, non-response is \emph{not} refusal.  Failures to volunteer are largely due to the feeling that it is ``burdensome'' to opt-in \cite{junghans2005recruiting}.  For example, the majority of people who didn't respond to a medical study recruitment but did respond to a follow-up questionnaire asking why they refused, said they were interested in the research but didn't participate because of perceived ineligibility, privacy concerns, misunderstanding of the research, and other personal reasons \cite{williams2007no}.  This is consistent with the finding that those who have higher perceived (and real) barriers are less likely to respond, such as those who are in poorer health, older, and lower socio-economic status.

Evidence from demand-response studies using in-home displays supports the same conclusion.  In the Hydro One pilot \cite{frank2008hydro} some refusals were based on the feeling that study participants would be told ``when and how to use electricity.''  Most (77\%) non-participants in the PowerCost Monitor Evaluation Study \cite{norton2008powercost} were contacted contacted through direct mail but didn't recall receiving an offer.  However, 94\% of those those who were offered the PCM during a home energy audit (National Grid) accepted, but only 14\% of previously audited customers who were offered a monitor through the mail accepted (WMECO), indicating the inconvenience of having to respond to a mailing was the driving force of failure to recruit.  In the ComEd Energy Smart Pricing Pilot \cite{sbc2006evaluation,sbc2007evaluation}, about half of those recruited did not respond to a survey because they didn't remember hearing about it, believed they were disqualified based on technical issues, believed they were disqualified because they participated in other programs, perceived price risk or lack of savings benefits, or saw the programs as too complex.

Aside from whether non-response should be interpreted as consent or refusal, the evidence for the benefit of opt-out to increase recruitment rates is voluminous.  One systematic review of medical research by Treweek \emph{et al.} \cite{treweek2010strategies} found an increase in recruitment by 39\% on average (95\% CI [6\% to 84\%]).  Junghans \emph{et al.} \cite{junghans2005recruiting} found a 38\% (96/252) clinic attendance for opt-in, and 50\% attendance (128/258) for opt-out.  A Scottish survey of 10000 adults by their National Health System reached only 20\% response rate with opt-in when other surveys in the same area previously achieved 70-80\% response rates with opt-out \cite{hewison2006overcoming}, leading them to the conclusion that ``bias and reduced response rates under an opt-in system are not just plausible but predictable.''
\section{Adjusting for Volunteer Bias}

Even if one follows current best practices for recruitment (e.g., \cite{treweek2010strategies,dillman2007mail})\footnote{These include: 1) monetary incentive, 2) recorded delivery, 3) teaser on the envelope, 5) interesting topic; 6) pre-notification; 7) follow-up contact; 8) unconditional incentives; 9) shorter questionnaire; 10) second copy of questionnaire at follow-up; 11) mentioning an obligation to respond; 12) university sponsorship; 13) non-monetary incentives; 14) personalized questionnaires; 15) hand-written addresses; 16) stamped return envelopes >franked return envelopes; 17) assurance of confidentiality; 18) first class outward mailing; 19) statement that others had responded; 20) lottery with immediate notification of results; 21) a simple header; 22) textual representation of response categories; 23) a deadline; 24) picture included in an email.  However, some approaches increase non-response, including mentioning ``survey'' in an email subject line and including a male signature \cite{edwards2009methods}.} some proportion of those recruited will not participate.  Fortunately, if one can create a model of the process of volunteering, and predict who volunteers, then the risk of incorrect generalization from sample to population can be minimized.  One simple approach is to use \emph{propensity score adjustment} \cite{wooldridge2009introductory,gelman2007data}, that explicitly models each participant's probability of volunteering.  For example, the Commonwealth Edison Energy Smart Pricing Pilot \cite{sbc2006evaluation,sbc2007evaluation} was marketed to members of the Community Energy Cooperative, yielding 750 customers.  A propsensity score model with Inverse Mills ratio was used to determine that participant selection did not affect estimates of peak energy reduction.  PG\&E's Smart-Rate Pilot \cite{george2010load} also used propensity score matching to address differences between those who did and did not volunteer.

Unfortunately, developing a good model of volunteering is extremely difficult for three reasons: (1) finding the right predictors, (2) combining the right predictors, (3) recruiting non-volunteers to train the model.  

First, it is difficult to find the right predictors of volunteering.  
[Tamar: set up the measures we use here]
The problem of volunteer bias has been acknowledged across the social sciences, and research on volunteer bias extends back over more than 80 years \cite{rosenzweig1933experimental,edgerton1947objective,wallin1949volunteer}.  In 1975, Rosenthal and Rosnow \cite{rosenthal1975volunteer} published a thorough review of the evidence of those ways in which volunteer subjects systematically differed from their non-volunteer counterparts.  The demographic factors  most commonly identified as being consistently related to differences in frequency of volunteering  include education and socioeconomic status (with more highly educated and higher SES groups being more likely to volunteer) and sex (with women volunteering more often than men).  Psychological factors include those being motivated by approval being more likely to volunteer,  as well as those who are more sociable.

Much of the work on volunteer bias has been conducted with university student research pools. In identifying bias among volunteers for real-world initiatives, additional factors may play a role. The appeal and relevance of the specific initiative being presented, for example, may be as strong or stronger a predictor than any given demographic factor.  For example, volunteers for research on sexual behavior have been shown to have greater concerns about sexual functioning and higher incidences of sexual problems \cite{nirenberg1991volunteer}.

Other evidence from more real-world volunteering situations suggests that factors such as proficiency in the area of volunteering predict enrollment in a specific program \cite{callahan2007volunteer}.  In addition, volunteering for studies may in part be determined by individual differences in altruism, with volunteering being viewed as a  pro-social act on the part of the volunteer (insert cites). Volunteering for an energy initiative that may result in some financial benefit, however, may be far less likely to be viewed by the participant as an altruistic act. In the following studies, we take into account those demographic and psychological factors that have been shown to predict volunteering in a more traditional academic setting, as well as factors that may motivate volunteering for a specific and personally beneficial energy initiative.

\footnote{The Oberlin TED5000 study \cite{allen2006effects} recruited 60 households in Orberlin Ohio who completed a door-to-door energy survey.  Half of those recruited were in a high income neighborhood, and the other half were in a low income neighborhood.  Five high and five low income households were invited to participate based on who was home and who agreed to participate.  The control group consisted of the remaining 50 households that were either not contacted or explicitly refused to participate.  The Ontario Energy Board Hydro One pilot \cite{frank2008hydro} used participants selected randomly from a stratified sample of 23,000 customers.  They sent 3,100 invitations, contacted 2,700 by telephone, eventually ending up with 411 customers (13\% recruitment).  Those who were recruited knew the treatment they would receive when they decided to volunteer, and the control group was recruited separately.  The Energy Trust of Oregon PowerCost Monitor study \cite{sipe2009net} allowed 164 participants to purchase the PowerCost Monitor on a website for \$29.99 and self-install, and also installed 201 devices during home energy audits.  Control groups were sampled either from the general Oregon population based on several stratification variables (e.g., weather) or sampled from those who had home energy audits but did not receive the in-home display.  Thus, neither control groups volunteered, but both treatment groups did.  The Baltimore Gas and Electric Smart Energy Pricing Pilot with the Energy Orb \cite{faruqui2009bge} randomly sampled 5,000 customers, mailed information about the pilot inviting them to join, and allowed them to respond by email or phone.  Participants were first sent a flyer specific to the treatment program with program rate, potential critical peak hours, and tips for reducing electricity to save money.  They were then followed up with a recruitment phone call with more information and a chance to sign up.  If participants did not respond they were contacted by phone.  The study recruited 117/440 customers ($\sim$27\%)  with 33 (28\%) dropouts due to 24 changing their mind, 4 move outs, 3 technical issues, and one with a tenant dwelling.  In their interval meter test sample they recruited 33/200 (16.5\%) with 5 (15\%) dropouts.  They were offered \$150 or \$100 to participate upon completing the program.  They then assigned those who were not contacted or refused to participate to the control group.  There is also an intermediate case where random assignment occurs \emph{before} volunteering.  For example, Connecticut Light and Power \cite{faruqui2009connecticut} attempted to enroll 117 customers for each treatment condition with over-enrollment in expectation of move-outs and opt-outs.  Customers were randomly assigned to condition before volunteering.  Customers were assigned to a marketing wave to avoid early responder bias, were given two weeks to respond to the pilot invitation through direct mail or calls, and if they didn't respond they were not part of the study and the next wave started (waves every 2 weeks).  The uptake rate through direct mail and phone calls was 3.1\%.  Omaha public power contacted 2000 members of an existing pilot study by phone or email, resulting in with 199 eligible respondents and 151 successful recruits.  The control group was not recruited at all.  Instead 95 homes were taken as a representative sample based on load forecasts.  The Florida Power and Light Energy Dective study \cite{parker2008pilot} recruited participants based on interest in the study, providing a free installation and device (The Energy Detective) to 22 homes.  The control group was a non-volunteer sample of Florida Power and Light customers.}





Expectations of the in-home display strongly predicted willingness to volunteer.  After accounting for expected enjoyment and savings from the IHD, there was little power for additional predictor variables to improve the model.  Other variables were related, including the NEP scale.  However, they were not nearly as strong as the IHD expectations (WHY?).

[We can theorize here. Contextual variables are important a la previous research. Also, because motivations may be conflicting (this is a separate analysis), could be why expectations trump all else.] 

Based on the literature, some variables that we might expect to predict volunteering did not.  One instance was the behavioral measure of volunteering.  One would expect that an actual behavior, willingness to continue the survey to help the researcher, would be strongly related to other volunteering behaviors, including volunteering for energy efficiency.  Surprisingly, this was not the case (why?).

[Here’s why some things might now work out (see intro). Volunteering: not a purely altruistic situation. Some financial/personal benefit to be had.  Conflicting motivations could lead to crowding out. Money vs environment.  Winner will likely be anticipated immediate tangible benefits.]

In terms of modeling, the simpler Take the Best heuristic performed as well or better than all of the other approaches.  Both the Logistic regression and Classification Tree approach performed similarly.  The similarity in predictive ability of these approaches occurs because they all gave the largest weight to the best predictor, the expectation of enjoyment of the IHD.  Thus, this is consistent with previous research showing social science contexts involve multiple correlated predictors that are unlikely to perform better than the best predictor.

An alternative interpretation is that we were just poor at creating and picking good predictors, in the sense that the variables independently predict intention to enroll, or that they interact with each other configurally, with the predictive ability of one variable depending on the values of other variables.  The inability of human judges to pick independent and configural predictors is well known.  To rule out this possibility blah blah blah, Study Two expands the set of predictors to include additional validated scales that would address prediction from multiple independent dimensions (e.g., volunteering, purchasing, social perceptions).

[Write more about why we need NEW measures. How about a measure that actually captures behavior as opposed to self-report and some variables we think will be independent. MAKE THIS EXPLICIT.] 

\footnote{We also compared how the mention of the supporting institution, either the Department of Energy (DOE), scientists, utility company, or a required subscription from the IHD vendor affect willingness to particicpate.  The highest proportion of intended enrollments was in the scientists and DOE condition ($M=$ \Sexpr{ztrunc(mean(scientists.doe.m))}, $SE=$ \Sexpr{ztrunc(sd(scientists.doe.m))}).  This was significantly higher than the next highest condition, scientists alone ($M=$ \Sexpr{ztrunc(mean(scientists.m))}, $SE=$ \Sexpr{ztrunc(sd(scientists.m))}), by about 14\% ($t(204)=2.00, p=0.05, d=0.14$).  Scientists alone was followed by scientists and a subscription ($M=$ \Sexpr{ztrunc(mean(scientists.subscription))} $SE=$ \Sexpr{ztrunc(sd(scientists.subscription))}), followed by scientists and the utility ($M=$ \Sexpr{ztrunc(mean(scientists.pepco.m))} $SE=$ \Sexpr{ztrunc(sd(scientists.pepco.m))}), and lastly scientists and utility with program offerings from the utility ($M=$ \Sexpr{ztrunc(mean(scientists.pepco.offers.m))}, $SE=$ \Sexpr{ztrunc(sd(scientists.pepco.offers.m))}).}   

\emph{IHD Expectations} &  &  &  \\
I would enjoy having an IHD in my home. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.enjoy,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.enjoy,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[1,1])} &  \\ 
An in-home display would help me save electricity each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.electricity,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.electricity,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[2,1])} &  \\ 
An IHD would help me save money each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.money,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.save.money,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[3,1])} &  \\ 
I would learn from an IHD. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$ihd.learn,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo2$ihd.learn,as.numeric(vmoo2$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD2[4,1])} &  \\ 
IHD Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(-SF.IHD2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(-SF.IHD2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.IHD2[1])}\% & \Sexpr{ztrunc(alpha.ov.IHD2)} \\ 
& & & \\
\emph{NEP} &  &  &  \\
Plants and animals exist primarily to be used by humans. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo2$use.plants.animals,as.numeric(vmoo2$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo2$use.plants.animals,as.numeric(vmoo2$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(-L.NEP2[1,1])} &  \\ 
delicate.nature & \Sexpr{ztrunc(cor.test(vmoo2$delicate.nature,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$delicate.nature,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[2,1])} &  \\ 
There are limits to growth beyond which our industrialized society cannot expand. & \Sexpr{ztrunc(cor.test(vmoo2$growth.limits,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$growth.limits,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[3,1])} &  \\ 
The earth is like a spaceship with only limited room and resources. & \Sexpr{ztrunc(cor.test(vmoo2$spaceship.earth,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo2$spaceship.earth,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP2[4,1])} &  \\ 
NEP Factor & \Sexpr{ztrunc(abs(cor.test(-SF.NEP2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{ztrunc(abs(cor.test(-SF.NEP2$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.NEP2[1])}\% & \Sexpr{ztrunc(alpha.ov.NEP2)}  \\ 
& & & \\

%Age &              \Sexpr{ztrunc(cor.test(vmoo2$age,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo2$age,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(mean(vmoo2$age,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo2$age,na.rm=TRUE))} \\
%Gender &      & \Sexpr{ztrunc(mean(as.numeric(vmoo2$gender)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo2$gender)-1,na.rm=TRUE)))} \\ 
%Race (Asian, Black, Hispanic, Native American, White) &       \Sexpr{prettyNum(as.numeric(chisq.test(vmoo2$race,vmoo2$enroll)[1]))}          (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$race,vmoo2$enroll)[3]))}) & &  \\ 
%Political (Democrat, Independent, Republican) &  \Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$political,vmoo2$enroll)[1]))}     (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$political,vmoo2$enroll)[3]))}) & &  \\ 
%Education &  \Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$education,vmoo2$enroll)[1]))}     (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$education,vmoo2$enroll)[3]))}) & &  \\ 
%Employment & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$employment,vmoo2$enroll)[1]))}    (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo2$employment,vmoo2$enroll)[3]))}) & &  
%$

%Aggregating for three different time periods, Total hours $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$total.hours,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$total.hours,vmoo2$enroll)[3])}, morning hours $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$morning.hours,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$morning.hours,vmoo2$enroll)[3])}, and evening hours $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$evening.hours,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$evening.hours,vmoo2$enroll)[3])}.

%Three volunteering behaviors not shown in these tables were also significantly related to intentions to enroll, previously owning an energy tracking device $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$tracking.device,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$tracking.device,vmoo2$enroll)[3])}, prize drawings $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$prize.drawing,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$prize.drawing,vmoo2$enroll)[3])}, protest or demonstration $\chi^{2}(1)=$ \Sexpr{prettyNum(chisq.test(vmoo2$protest.demonstration,vmoo2$enroll)[1])}, $p=$ \Sexpr{prettyNum(chisq.test(vmoo2$protest.demonstration,vmoo2$enroll)[3])}.

%$
%the overall internal consistency was $\alpha$ = \Sexpr{ztrunc(alpha.ov.trust2.red)}, and the factor loadings on the first factor were <Local, Scientists, Utility, Co-Workers> = $< \Sexpr{ztrunc(-L.trust2[1,1])},\Sexpr{ztrunc(-L.trust2[2,1])},\Sexpr{ztrunc(-L.trust2[3,1])},\Sexpr{ztrunc(-L.trust2[4,1])}>$.
%The trust scale had an overall internal consistency was high ($\alpha$ = \Sexpr{ztrunc(alpha.ov.trust2)}).  
%A Principal Components Analysis shows that one factor accounts for \Sexpr{prettyNum(100*vars.trust2[1])}\% of the variance, and a second factor accounts for \Sexpr{prettyNum(100*vars.trust2[2])}\% of the variance.  

\emph{Social Comparison} &  &  &  \\
My household has done more to reduce its electricity consumption. & \Sexpr{ztrunc(cor.test(vmoo2$more.reduce,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$more.reduce,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.socom[1,1])} &  \\ 
My household cares more about the environment. & \Sexpr{ztrunc(cor.test(vmoo2$care.environment,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo2$care.environment,as.numeric(vmoo2$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.socom[2,1])} &  \\ 
My household recycles more consistently. & \Sexpr{ztrunc(cor.test(vmoo2$consistent.recycle,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(abs(cor.test(vmoo2$consistent.recycle,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.socom[3,1])} &  \\ 
My household is more active in the community. & \Sexpr{ztrunc(cor.test(vmoo2$active.community,as.numeric(vmoo2$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo2$active.community,as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.socom[4,1])} &  \\ 
Social Comparison Factor & \Sexpr{ztrunc(abs(cor.test(SF.socom$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$estimate))} (\Sexpr{prettyNum(abs(cor.test(SF.socom$scores[,1],as.numeric(vmoo2$enroll),method="kendall")$statistic))}) & \Sexpr{prettyNum(100*vars.socom[1])}\% & \Sexpr{ztrunc(alpha.ov.socom)} \\ \hline



These include: 1) monetary incentive, 2) recorded delivery, 3) teaser on the envelope, 5) interesting topic; 6) pre-notification; 7) follow-up contact; 8) unconditional incentives; 9) shorter questionnaire; 10) second copy of questionnaire at follow-up; 11) mentioning an obligation to respond; 12) university sponsorship; 13) non-monetary incentives; 14) personalized questionnaires; 15) hand-written addresses; 16) stamped return envelopes >franked return envelopes; 17) assurance of confidentiality; 18) first class outward mailing; 19) statement that others had responded; 20) lottery with immediate notification of results; 21) a simple header; 22) textual representation of response categories; 23) a deadline; 24) picture included in an email.

However, some approaches increase non-response, including mentioning ``survey'' in an email subject line and including a male signature. 

\section{Study One}
Study One asked participants to state their intentions to enroll in a hypothetical in-home display trial.  Following elicitation of their enrollment intentions, participants completed a questionnaire that included measures previously demonstrated to be associated with volunteering, as well as measures that we believed would be related to volunteering in the context of this trial.  The psychometric properties and predictive validity of these measures were then evaluated along with multivariate prediction models of stated intention to volunteer.  Finally, the validity of these models was assessed using estimates of generalization error computed using cross-validation, bootstrap, and predictions of independently sampled data from Study Two. 

\subsection{Methods}
\subsubsection{Participants}
The participants were 188 U.S. residential bill-payers who volunteered to participate through a recruitment advertisement on Amazon's MTurk.  Some participants did not complete all measures, resulting in 142 observations for most statistical analyses.  They were on average 36 years old (range 19--69), 104 (55\%) were women, and their self-reported average monthly bill was \$117 (range \$6--\$420). 

\subsubsection{Materials}
Standard demographics were used used, namely age, gender, employment status (full time, part time, unemployed, student, homemaker, retired), education (less than high school, high school/GED, college, associate's degree, 4 year degree, and professional degree), annual household income, race, and political affiliation.\footnote{All materials can be found in~\ref{app:measone}.}

Participants were asked for the total hours they spent in the home during the day and whether they were in the home for each of six 4-hour time periods.  These measures were intended to proxy for the degree to which participants could use the in-home display.  Single-item measures of trust and satisfaction with the utility company were used, along with items asking about ways they may have contacted the utility due to poor services, as we expected trust, satisfaction, and confidence in the utility company would be necessary criteria for customers to consider enrolling in the trial.

Aside from demographics and time constraints, participants may volunteer based on social motivations or interest in the topic.  Social motivations were measured by having participants report their belief about whether they cared more about the environment or did more to reduce electricity the average household in their area.  Other motivations to participate, for example the desire to have more reliable service, were adapted from a previous study \cite{krishnamurti2011preparing}.

In terms of topics, the trial could be construed as being about the environment, the in-home display, or about new technology in general.  Interest in the first topic was measured using the New Ecological Paradigm (NEP) \cite{dunlap2000new}, a widely used measure of environmental attitudes.  Interest in the second topic was measured using a four item scale of their attitudes and expectations of the in-home display, such as whether participants expected it to help them save money.  Interest in new technology in general was assessed using two questions about whether participants had heard of in-home displays or smart-meters, and two questions about whether they were eager to buy new or eco-friendly technologies.

Lastly, two questions assessed other volunteering behaviors, asking whether participants had previously volunteered in an energy efficiency program, and whether they would be willing to complete additional questions after the survey.  Several similar questions asked whether participants engaged in volunteer-like civic behaviors, for example using a public library.

\subsection{Results}
\subsubsection{Psychometric Analyses and Univariate Prediction}

The psychometric analyses \cite{devellis2011scale,loehlin2004latent,demars2010item} use Chronbach's alpha \cite{cronbach1951coefficient} and item-total correlations \cite{spector1992summated} as measures of internal consistency (reliability), and Principal Components Analysis \cite{frs1901liii} as a measure of dimensional structure.\footnote{Principal Components Analysis uses Spectral Decomposition \cite{wilkinson1965algebraic} to find the set of eigenvectors for the variance-covariance matrix of the data.  This uses the EISPACK \cite{smith1976matrix} and LAPACK routines (DSYEVR, DGEEV, ZHEEV, ZGEEV, \url{http://www.netlib.org/lapack/lug/node29.html}) or Singular Value Decomposition \cite{anderson1999lapack}.}  To create a short questionnaire that non-volunteers would be inclined to complete, we limit each contruct to four questions and use a rule of thumb that Chronbach's alpha must be greater than .7 \cite{nunnally1967psychometric} for an item to be included in a factor model, and elements with the lowest item-total correlations or lower than .4 will be dropped \cite{spector1992summated}.

\newpage

Table~\ref{tab:demo1} shows the demographic characteristics of participants along with their univariate correlations to enrollment intentions.  Consistent with previous research, these types of general demographics were not predictive of intentions to enroll.  There was one exception, as participants who reported being home from 6pm-10pm were more likely to be willing to participate in the trial. 

\begin{table}[hp]
  \caption{Univariate predictions, means, and standard deviations for the demographic items.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:demo1}
  \centering
\scalebox{0.87}{
  \begin{tabular}{p{10cm} c c c}
Item                   &  $\tau$ ($Z$)                 & Mean & SD \\ \hline
Age &              \Sexpr{ztrunc(cor.test(vmoo$age,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo$age,as.numeric(vmoo$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(mean(vmoo$age,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$age,na.rm=TRUE))} \\
Number of adults & \Sexpr{ztrunc(cor.test(vmoo$num.adults,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo$num.adults,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$num.adults,na.rm=TRUE))} & \Sexpr{prettyNum(sd(vmoo$num.adults,na.rm=TRUE))} \\ 
Number of children & \Sexpr{ztrunc(cor.test(vmoo$num.children,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo$num.children,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & 0.70 & \Sexpr{ztrunc(sd(vmoo$num.children,na.rm=TRUE))} \\ %\Sexpr{ztrunc(mean(vmoo$num.children,na.rm=TRUE))} 
& & & \\
Item                   &  $\chi^2$ ($p$)                 & Mean & SD \\ \hline
Gender &     \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$gender,vmoo$enroll)[1]))}        (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$gender,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$gender)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$gender)-1,na.rm=TRUE)))} \\ 
Race (Asian, Black, Hispanic, Native American, White) &       \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$race,vmoo$enroll)[1]))}          (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$race,vmoo$enroll)[3]))}) & &  \\ 
Political (Democrat, Independent, Republican) &  \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$political,vmoo$enroll)[1]))}     (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$political,vmoo$enroll)[3]))}) & &  \\ 
Education &  \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$education,vmoo$enroll)[1]))}     (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$education,vmoo$enroll)[3]))}) & &  \\ 
Employment & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$employment,vmoo$enroll)[1]))}    (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$employment,vmoo$enroll)[3]))}) & &  \\ 
& & & \\
\emph{Time at Home} &  &  &  \\
6am-10am &   \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$six.am.ten.am,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$six.am.ten.am,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$six.am.ten.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$six.am.ten.am)-1,na.rm=TRUE)))}  \\ 
10am-2pm &   \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$ten.am.two.pm,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$ten.am.two.pm,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$ten.am.two.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$ten.am.two.pm)-1,na.rm=TRUE)))} \\ 
2pm-6pm &    \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$two.pm.six.pm,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$two.pm.six.pm,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$two.pm.six.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$two.pm.six.pm)-1,na.rm=TRUE)))} \\ 
6pm-10pm & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$six.pm.ten.pm,vmoo$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$six.pm.ten.pm,vmoo$enroll)[3]))})}$ & \Sexpr{ztrunc(mean(as.numeric(vmoo$six.pm.ten.pm)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$six.pm.ten.pm)-1,na.rm=TRUE)))} \\ 
10pm-2am &   \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$ten.pm.two.am,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$ten.pm.two.am,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$ten.pm.two.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$ten.pm.two.am)-1,na.rm=TRUE)))} \\ 
2am-6am &    \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$two.am.six.am,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$two.am.six.am,vmoo$enroll)[3]))}) & \Sexpr{ztrunc(mean(as.numeric(vmoo$two.am.six.am)-1,na.rm=TRUE))} & \Sexpr{ztrunc(as.numeric(sd(as.numeric(vmoo$two.am.six.am)-1,na.rm=TRUE)))} \\ \hline
\end{tabular}}
\end{table}

\newpage
Table~\ref{tab:facs1} shows the psychometric properties of the two scales.  All of the in-home display expectation items had high item-total correlations  (greater than \Sexpr{ztrunc(it4.IHD1)} \cite{devellis2011scale}), and the principle components analysis shows that one factor accounts for \Sexpr{prettyNum(100*vars.IHD1[1])}\% of the variance, indicating the in-home display expectation items are well fit by a single dimension.  For the New Ecological Paradigm scale (NEP) \cite{dunlap2000new}, all item-total correlations were greater than \Sexpr{ztrunc(it2.NEP1)}.  The principle components analysis shows that one factor accounts for \Sexpr{prettyNum(100*vars.NEP1[1])}\% of the variance, and a second factor accounts for \Sexpr{prettyNum(100*vars.NEP1[2])}\% of the variance, indicating the NEP is not well fit by a uni-dimensional scale.  To reduce the scale to four items, item two (modify environment) and item four (delicate nature) were dropped, as they had the two lowest item-total correlations (\Sexpr{ztrunc(it2.NEP1)} and  \Sexpr{ztrunc(it4.NEP1)}, respectively).  Both the in-home display expectation and NEP factors were positively correlated to intentions to enroll, but the in-home display (IHD) factor was much stronger.

\begin{table}[hp]
  \caption{Univariate predictions, factor loadings and Cronbach's $\alpha$ for the in-home display expectations and NEP scales.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:facs1}
  \centering
\scalebox{0.87}{
  \begin{tabular}{p{10cm} c c c}
Item                   &  $\tau$ ($Z$)                 & Loading & $\alpha$ \\ \hline
\emph{IHD Expectations} &  &  &  \\
I would enjoy having an IHD in my home. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$ihd.enjoy,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$ihd.enjoy,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD1[1,1])} &  \\ 
An IHD would help me save money each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$ihd.save.money,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$ihd.save.money,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$   & \Sexpr{ztrunc(-L.IHD1[2,1])} &  \\ 
An in-home display would help me save electricity each month. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$ihd.save.electricity,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$ihd.save.electricity,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD1[3,1])} &  \\ 
I would learn from an IHD. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$ihd.learn,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$ihd.learn,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(-L.IHD1[4,1])} &  \\ 
IHD Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(frame.fac$IHD1.fac,as.numeric(frame.fac$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(frame.fac$IHD1.fac,as.numeric(frame.fac$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.IHD1[1])}\% & \Sexpr{ztrunc(alpha.ov.IHD1)} \\ 
& & & \\
\emph{NEP} &  &  &  \\
Plants and animals exist primarily to be used by humans. & \Sexpr{ztrunc(cor.test(vmoo$use.plants.animals,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo$use.plants.animals,as.numeric(vmoo$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(-L.NEP1[1,1])} &  \\
Mankind was created to rule over the rest of nature. & \Sexpr{ztrunc(cor.test(vmoo$rule.nature,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo$rule.nature,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP1[2,1])} &  \\ 
There are limits to growth beyond which our industrialized society cannot expand. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$growth.limits,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$growth.limits,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$   & \Sexpr{ztrunc(-L.NEP1[3,1])} &  \\ 
The earth is like a spaceship with only limited room and resources. & \Sexpr{ztrunc(cor.test(vmoo$spaceship.earth,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo$spaceship.earth,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(-L.NEP1[4,1])} &  \\ 
NEP Factor & $\mathbf{\Sexpr{ztrunc(abs(cor.test(frame.fac$NEP1.fac,as.numeric(frame.fac$enroll),method="kendall")$estimate))} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(frame.fac$NEP1.fac,as.numeric(frame.fac$enroll),method="kendall")$statistic))})}$ & \Sexpr{prettyNum(100*vars.NEP1[1])}\% & \Sexpr{ztrunc(alpha.ov.NEP1.red)} \\ \hline
\end{tabular}}
\end{table}

\newpage
Table~\ref{tab:soc1} shows univariate predictions, means, and standard deviations for questionnaire items on social comparisons, messaging, constraints, purchasing, and utility perceptions.  Those who believed they were more caring about the environment and reduced electricity more than the average household were more likely to indicate that they would volunteer for the trial, suggesting the desire or belief that one is more environmental than others may drive enrollment behaviors.  Participants also reported that if participation in the study were conveyed as protecting the environment or reducing waste that they would be more likely to volunteer.  Similarly, interest in new and environmental technologies positively predicted enrollment intentions.  Surprisingly, participants who spent more time in the home were less likely to volunteer, and trust or satisfaction with the utility were unrelated to intentions to volunteer.

\begin{table}[hp]
  \caption{Univariate prediction, means, and standard deviations for the Social Comparisons, Messages, Constraints, Purchases, Utility Perceptions items.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:soc1}
  \centering
\scalebox{0.87}{
  \begin{tabular}{p{10cm} c c c}
Item                   &  $\tau$ ($Z$)                 & Mean & SD \\ \hline
\emph{Social Comparisons} &  &  &  \\
My household uses more electricity. & \Sexpr{ztrunc(cor.test(vmoo$more.electriciy,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo$more.electriciy,as.numeric(vmoo$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(mean(vmoo$more.electriciy,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$more.electriciy,na.rm=TRUE))} \\ 
My household has done more to reduce its electricity consumption. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$more.reduce,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$more.reduce,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$more.reduce,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$more.reduce,na.rm=TRUE))} \\ 
My household cares more about the environment. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$care.environment,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$care.environment,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$care.environment,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$care.environment,na.rm=TRUE))} \\ 
& & & \\
\emph{Messages} &  &  &  \\
Protect Environment & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$protect.environment,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$protect.environment,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$protect.environment,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$protect.environment,na.rm=TRUE))} \\ 
Save Money & \Sexpr{ztrunc(cor.test(vmoo$save.money,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo$save.money,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$save.money,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$save.money,na.rm=TRUE))} \\ 
Control & \Sexpr{ztrunc(cor.test(vmoo$control,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo$control,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$control,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$control,na.rm=TRUE))} \\ 
Reliability & \Sexpr{ztrunc(cor.test(vmoo$reliability,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo$reliability,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$reliability,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$reliability,na.rm=TRUE))}\\ 
Energy Independence & \Sexpr{ztrunc(cor.test(vmoo$energy.indeendence,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo$energy.indeendence,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$energy.indeendence,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$energy.indeendence,na.rm=TRUE))} \\ 
More Choices & \Sexpr{ztrunc(cor.test(vmoo$more.choices,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{prettyNum(cor.test(vmoo$more.choices,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$more.choices,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$more.choices,na.rm=TRUE))} \\ 
Avoid Waste & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$avoid.waste,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$avoid.waste,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$avoid.waste,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$avoid.waste,na.rm=TRUE))} \\ 
& & & \\
\emph{Constraints} &  &  &  \\
I feel that I personally have control over how much electricity is consumed in my household. & \Sexpr{ztrunc(cor.test(vmoo$control.consume,as.numeric(vmoo$enroll),method="kendall")$estimate)} (0.02) & \Sexpr{ztrunc(mean(vmoo$control.consume,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$control.consume,na.rm=TRUE))} \\ %(\Sexpr{prettyNum(cor.test(vmoo$control.consume,as.numeric(vmoo$enroll),method="kendall")$statistic)})
Total Hours Home & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$hours.home,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(abs(cor.test(vmoo$hours.home,as.numeric(vmoo$enroll),method="kendall")$statistic))})}$ & \Sexpr{ztrunc(mean(vmoo$hours.home,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$hours.home,na.rm=TRUE))} \\ 
& & & \\
\emph{Purchasing} &  &  &  \\
I am always eager to be the first to buy a new technology. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$buy.new.tech,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$buy.new.tech,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$buy.new.tech,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$buy.new.tech,na.rm=TRUE))} \\ 
When I have a choice between two equal products, I always buy the one that is less harmful to other people and the environment. & $\mathbf{\Sexpr{ztrunc(cor.test(vmoo$eco.purchase,as.numeric(vmoo$enroll),method="kendall")$estimate)} \hspace{3pt} (\Sexpr{ztrunc(cor.test(vmoo$eco.purchase,as.numeric(vmoo$enroll),method="kendall")$statistic)})}$ & \Sexpr{ztrunc(mean(vmoo$eco.purchase,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$eco.purchase,na.rm=TRUE))} \\
& & & \\
\emph{Utility Perceptions} &  &  &  \\
I am satisfied with my electricity company. & \Sexpr{ztrunc(cor.test(vmoo$satisfied.utility,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(cor.test(vmoo$satisfied.utility,as.numeric(vmoo$enroll),method="kendall")$statistic)}) & \Sexpr{ztrunc(mean(vmoo$satisfied.utility,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$satisfied.utility,na.rm=TRUE))} \\ 
I trust my electricity company to do the right thing by their customers. & \Sexpr{ztrunc(cor.test(vmoo$trust.utility,as.numeric(vmoo$enroll),method="kendall")$estimate)} (\Sexpr{ztrunc(abs(cor.test(vmoo$trust.utility,as.numeric(vmoo$enroll),method="kendall")$statistic))}) & \Sexpr{ztrunc(mean(vmoo$trust.utility,na.rm=TRUE))} & \Sexpr{ztrunc(sd(vmoo$trust.utility,na.rm=TRUE))} \\ \hline
\end{tabular}}
\end{table}
 
\newpage
Table~\ref{tab:civ1} shows univariate predictions, means, and standard deviations for items involving civic engagement, previous volunteering, technology awareness and utility contacts.  As can be seen, two energy efficient purchasing behaviors, buying CFLs and appliances, were both related to volunteering intentions.  The less energy related behavior of contibuting contributing to a 401k fund was also predictive of intentions to enroll.  There were several surprising results, where both the self-reported and the behavioral measure of volunteering did not predict enrollment intentions, which may indicate that general volunteering traits are not very important in this context.  Similar to the utility perception results, utility contacts were unrelated to willingness to volunteer. 

\begin{table}[hp]
  \caption{Univariate predictions, means, and standard deviations for the Civic Engagement, Volunteering, Technology Awareness, and Utility Contacts, items.  $\tau$ is Kendall's measure of rank correlation.}
  \label{tab:civ1}
  \centering
\scalebox{0.87}{
  \begin{tabular}{p{10cm} c c c}
Item                   &  $\chi^2$ ($p$)                 & Mean & SD \\ \hline
\emph{Civic Engagement} &  &  &  \\
Bought Compact Flourescent Lights & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$cfl,vmoo$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$cfl,vmoo$enroll)[3]))})} & & \\ 
Used an electricity tracking device (e.g., an in-home dispay) & \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$tracking.device,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$tracking.device,vmoo$enroll)[3]))}) & &  \\ 
Bought one or more energy efficient appliances & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$efficient.appliances,vmoo$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$efficient.appliances,vmoo$enroll)[3]))})}$ & & \\ 
Insulated my home & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$insulated.home,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$insulated.home,vmoo$enroll)[3]))}) & & \\ 
Got a flu shot & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$flu.shot,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$flu.shot,vmoo$enroll)[3]))}) & &  \\ 
Recycled & \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$recycled,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$recycled,vmoo$enroll)[3]))}) & &  \\ 
Contributed to a retirement savings (e.g., 401k) & $\mathbf{\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$four.oh.one.k,vmoo$enroll)[1]))} \hspace{3pt} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$four.oh.one.k,vmoo$enroll)[3]))})}$ & &  \\
Used the public library & \Sexpr{prettyNum(as.numeric(chisq.test(vmoo$library,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$library,vmoo$enroll)[3]))}) & &  \\
Enrolled in prize drawings & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$prize.drawing,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$prize.drawing,vmoo$enroll)[3]))}) & &  \\
Donated time to a charity or non-profit organization. & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$donated.time,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$donated.time,vmoo$enroll)[3]))}) & &  \\
Donated money to a charity or non-profit organization. & 0.05  (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$donated.money,vmoo$enroll)[3]))}) & & \\ %\Sexpr{prettyNum(as.numeric(chisq.test(vmoo$donated.money,vmoo$enroll)[1]))}
Bought a lottery ticket & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$lottery.ticket,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$lottery.ticket,vmoo$enroll)[3]))}) & &  \\
& & & \\
\emph{Volunteering} &  &  &  \\
Have you previously participated in any programs offered by your electric utility company? & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$previous.program,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$previous.program,vmoo$enroll)[3]))}) & &  \\ 
Please help us by filling out the questionnaire on the next few pages about your experience with the survey. & 0.05 (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$continue.survey,vmoo$enroll)[3]))}) & &  \\ %\Sexpr{prettyNum(chisq.test(vmoo$continue.survey,vmoo$enroll)[1])}
& & & \\
\emph{Technology Awareness} &  &  &  \\
Do you currently have a smart meter in your home? & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$have.smart.meter,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$have.smart.meter,vmoo$enroll)[3]))}) & &  \\ 
Have you heard of in-home electricity displays before this survey? & 0.03 (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$heard.of.ihd,vmoo$enroll)[3]))}) & & \\ %\Sexpr{prettyNum(chisq.test(vmoo$heard.of.ihd,vmoo$enroll)[1])}
& & & \\
\emph{Utility Contacts} &  &  &  \\
Blackouts and blackout-related problems & \Sexpr{prettyNum(chisq.test(vmoo$contact.blackouts,vmoo$enroll)[1])} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$contact.blackouts,vmoo$enroll)[3]))}) & & \\ 
Incorrect or confusing billing & \Sexpr{ztrunc(as.numeric(chisq.test(vmoo$contact.billing,vmoo$enroll)[1]))} (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$contact.billing,vmoo$enroll)[3]))}) & & \\ 
Poor provision of electricity services & 0.08 (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$contact.provision,vmoo$enroll)[3]))}) & & \\ %\Sexpr{prettyNum(chisq.test(vmoo$contact.provision,vmoo$enroll)[1])} 
I have never contacted the electricity company & 0.30 (\Sexpr{ztrunc(as.numeric(chisq.test(vmoo$never.contacted,vmoo$enroll)[3]))}) & & \\ \hline %\Sexpr{prettyNum(chisq.test(vmoo$never.contacted,vmoo$enroll)[1])}  
\end{tabular}}
\end{table}

\subsection{Multivariate Prediction of Enrollment Intentions}
\subsubsection{Logistic Regression}
The logistic regression method has no way of searching for good sets of predictors, so we ``helped'' the model by supplying it with only the variables that were significantly related to enrollment intentions in univariate association.  In multiple regression, the only statistically significant predictor was whether they expected to enjoy the in-home display, $t(\Sexpr{glm1$df.residual})=$ \Sexpr{ztrunc(glm1$coefficients[6]/sqrt(diag(vcov(glm1))[6]))}, $p<$ \Sexpr{ztrunc(as.numeric(dt(glm1$coefficients[6]/sqrt(diag(vcov(glm1))[6]),glm1$df.residual)))}. 

\subsubsection{Classification Tree}
<<treeplot1,echo=false,results=hide,fig=false>>=
vmoo$enroll<-factor(vmoo$enroll, labels=c("Refuse", "Enroll"))
heat.tree <- function(tree, low.is.green=FALSE, ...) { # dots args passed to prp
  y <- tree$frame$yval
  if(low.is.green)
    y <- -y
  max <- max(y)
  min <- min(y)
  cols <- rainbow(99, end=.36)[
                        ifelse(y > y[1], (y-y[1]) * (99-50) / (max-y[1]) + 50,
                               (y-min) * (50-1) / (y[1]-min) + 1)]
  prp(tree, branch.col=cols, box.col=cols, ...)
}

rp1<-rpart(enroll~.,data=vmoo[,-1])
split.fun<-function(x,labs,digits,varlen,faclen)
{
labs<-c("","","","Expect enjoyment?","Expect savings?")
#   gsub(" > ",":\n",labs)
#print(labs)
}
png(file="treeplot1.png",width=5000,height=6500,res=500)
par(xpd=NA)
prp(rp1,split.fun=split.fun,extra=102,under=T,yesno=T,clip.right.labs=FALSE,main="",type=0,varlen=0,faclen=0,fallen.leaves=TRUE,branch=1,branch.lty=1,gap=15,border.col=0,split.border.col=0,xflip=FALSE,under.cex=1.3,split.cex=1.2,left=FALSE,branch.tweak=1,yshift=3,split.yshift=2,nspace=1,yspace=0,space=0,split.space=0,split.yspace=0)
#,split.prefix="Is ", split.suffix="?", eq=" ", facsep=" or "
#heat.tree(rp1,split.fun=split.fun,extra=2,under=T,yesno=F,clip.right.labs=FALSE,main="",split.prefix="Is ", split.suffix="?", eq=" ", facsep=" or ")
#type=4
#
dev.off()

rp1.1<-rpart(enroll~.,data=vmoo[,-c(1,57,58,59,60)])
split.fun<-function(x,labs,digits,varlen,faclen)
{
labs<-c("x1","x2","x3","Home between 6pm and 10pm","x5","Younger than 52 years old.","x7","Nature is delicate and easily upset","x9","Income<5.5","Care about the environment?","x12","x13","Buy new technology","Donated time to a charity")
#   gsub(" > ",":\n",labs)
#print(labs)
}
png(file="treeplot1-1.png",width=5000,height=6500,res=500)
par(xpd=NA)
prp(rp1.1,split.fun=split.fun,extra=102,under=T,yesno=T,clip.right.labs=FALSE,main="",type=0,varlen=0,faclen=0,fallen.leaves=TRUE,branch=1,branch.lty=1,gap=15,border.col=0,split.border.col=0,xflip=FALSE,under.cex=1.3,split.cex=1.2,left=FALSE,branch.tweak=1,yshift=3,split.yshift=2,nspace=1,yspace=0,space=0,split.space=0,split.yspace=0)
dev.off()
#top
#x11 Care environment>3.5
#left branch
#x4 six.ten.pm=yes
#x6 age<52
#x8 delicate nature >2.5
#x10 income <5.5
#right branch
#x15 donated time = yes
#x14 buy new tech>2.5
@ 

Figure~\ref{fig:treeplot1} shows that the Classification Tree discovered that only two variables were necessary to predict enrollment intentions well: whether participants expected to save money when using the in-home display and whether they thought they would enjoy it.  Almost all (81/92) participants who expected to save money ($\ge3.5$ on a 5 point scale) said they would volunteer.  Among those who did not expect to save as much money ($<3.5$ on a 5 point scale), most (32/50) said they would not want to enroll.  However, among those people who did not expect to save, most (9/12) who expected to enjoy having the IHD ($\ge3.5$ on a 5 point scale) would still enroll, whereas most of those (29/38) who neither expected to save money nor enjoy the IHD experience indicated that they would not enroll.

\begin{figure}[p]
\vspace{-10cm}
  \noindent\makebox[\textwidth]{%
    \scalebox{1.8}{\includegraphics{treeplot1}} 
  }  
      \caption{Study One Classification Tree predicting enrollment intentions.}
    \label{fig:treeplot1}
\end{figure}

Figure~\ref{fig:treeplot1-1} removes the in-home display expectation items to allow the Classification Tree algorithm to discover an alternative model.  Moving down the right branch of the tree, those who care about the environment ($\ge3.5$ on a 5 point scale) were very likely to indicate that they would volunteer for the study unless they both had not previously donated time to a charity in the last 12 months and were not interested in buying new technology ($<2$ on a 5 point scale).  However, moving down the left branch of the tree, those who did not care about the environment needed to be home between 6pm and 10pm, younger than 52 years old, believe that the balance of nature is delicate and easily upset ($>2.5$ on a 5 point scale), and were not very high on income ($<5.5$ on a 7 point scale).  If any of these conditions were not met, participants were much less likely to volunteer.

\begin{figure}[p]
\vspace{-10cm}
  \noindent\makebox[\textwidth]{%
    \scalebox{1.8}{\includegraphics{treeplot1-1}} 
  }  
      \caption{Study One Classification Tree predicting enrollment intentions without in-home display expectation items.}
    \label{fig:treeplot1-1}
\end{figure}

\subsubsection{Equal Weights}
The range of equal weights scores was \Sexpr{prettyNum(min(frame$eqsum))} to \Sexpr{prettyNum(max(frame$eqsum))}, with a mean of \Sexpr{prettyNum(mean(frame$eqsum))} and standard deviation \Sexpr{prettyNum(sd(frame$eqsum))}.  The correlation between the equal weighted sum of predictor variables and intentions to enroll was $r=$ \Sexpr{ztrunc(cor(frame$eqsum,as.numeric(frame$enroll),use="complete"))}.

\subsubsection{Performance Comparisons of Classification Methods}
Table~\ref{tab:studyone} shows the performance of the five different multivariate classification methods, showing (from top to bottom) the TTB heuristic (labeled Take the Best), equal weights (Equal Weights), Logistic Regression (Logistic), and Classification Tree with (Class Tree) and without (Class Tree no IHD) in-home display expectation items.\footnote{Logistic regression was also conducted with factor scores, but this did not improve predictive validity on any measure, so we omit it here.}  The predictive performance of each method is divided into \emph{in-sample error}, a measure of inaccuracy based on using the whole data set to train the prediction method, and \emph{generalization error}, a measure of inaccuracy based on using subsets of the data to train the prediction method.\footnote{ In-sample error measures were $\phi$, which is the correlation between the method's predictions and actual intentions to enroll, the True Positive Rate (\emph{TPR}, also called Sensitivity), which is the proportion of participants who intended to enroll that were correctly identified, the True Negative Rate (\emph{TNR}, also called Specificity), which is the proportion of participants who did not intend to enroll who were correctly identified, and \emph{Error}, which is the raw misclassification rate, or $1-accuracy$.  There were also two measures of generalization error, \emph{10-Fold CV}, which is the cross-validation estimate of error by splitting the data into 10 random subsets then predicting one subset from the other nine for all subsets, and the bootstrap method (\emph{Boot}) \cite{efron1993introduction}, where a random sample from the original dataset of equal size (142 observations) is taken with replacement.  The model is fit on this subsample and then used to make predictions for the full dataset.  This process is then repeated \Sexpr{N} times, and the average of the misclassifications across these simulations is the bootstrap estimate of generalization error.}

To place these performance metrics into context, a crude model that uses no information from the predictor variables would predict that all participants enroll and would misclassify \Sexpr{prettyNum(100*43/142)}\% of the participants.  All rules, except equal weights, had better accuracy than this error rate.  The logistic regression performed best on two of the in-sample error metrics, including overall predictive ability ($\phi$) and overall error.  However, as expected, the Logistic regression overfit the data by underestimating generalization error, and thus performed worse than the TTB heuristic on 10-Fold CV and worse than the Classification Tree on the bootstrapped estimate of error.   

\begin{table}[h]
  \begin{minipage}{\linewidth}
  \caption{Study One Performance of Classification Methods.\footnote{$\phi$ is the correlation between the method's prediction and intentions to enroll. \emph{TPR} is the proportion of people who intend to enroll correctly identified.  \emph{TNR} is the proportion of people who do not intend to enroll correctly identified.  \emph{Error} is the proportion of participants incorrectly classified.  \emph{10-Fold CV} is the estimated generalization error rate using 10-fold cross validation.  \emph{Boot} is the bootstrap estimate of generalization error.}}
  \label{tab:studyone}
  \centering
  \begin{tabular}{c c c c c c c}
 & \multicolumn{4}{c}{In-Sample Error} & \multicolumn{2}{c}{Generalization Error} \\
 Method & $\phi$ & TPR & TNR & Error & 10-Fold CV & Boot \\ \hline
Take the Best & \Sexpr{ztrunc((gm2[1]*gm2[4]-gm2[3]*gm2[2])/gmm2)} & $\mathbf{\Sexpr{ztrunc(gm2[4]/(gm2[4]+gm2[2]))}}$ & \Sexpr{ztrunc(gm2[1]/(gm2[1]+gm2[3]))} & \Sexpr{ztrunc((gm2[2]+gm2[3])/142)} & $\mathbf{\Sexpr{ztrunc(glmcv2$delta)}}$ & \Sexpr{ztrunc(mean(ttbmiss)/142)} \\ 
Equal Weights & \Sexpr{ztrunc((eqm[1]*eqm[4]-eqm[3]*eqm[2])/eqq)} & \Sexpr{ztrunc(eqm[4]/(eqm[4]+eqm[2]))} & \Sexpr{ztrunc(eqm[1]/(eqm[1]+eqm[3]))} &  \Sexpr{ztrunc((eqm[2]+eqm[3])/142)} & \Sexpr{ztrunc(glmcv3$delta)} & \Sexpr{ztrunc(mean(miss)/142)} \\
Logistic & $\mathbf{\Sexpr{ztrunc((gm[1]*gm[4]-gm[3]*gm[2])/gmm)}}$ & \Sexpr{ztrunc(gm[4]/(gm[4]+gm[2]))} & \Sexpr{ztrunc(gm[1]/(gm[1]+gm[3]))} & $\mathbf{\Sexpr{ztrunc((gm[2]+gm[3])/142)}}$ & \Sexpr{ztrunc(glmcv$delta)} & \Sexpr{ztrunc(mean(glmiss)/142)} \\ 
Class Tree & \Sexpr{ztrunc((rp1t[1]*rp1t[4]-rp1t[3]*rp1t[2])/rp1tt)} & \Sexpr{ztrunc(rp1t[4]/(rp1t[4]+rp1t[2]))} & \Sexpr{ztrunc(rp1t[1]/(rp1t[1]+rp1t[3]))} &  \Sexpr{ztrunc((rp1t[2]+rp1t[3])/142)} & \Sexpr{ztrunc(mean(fold.cart.miss))} & $\mathbf{\Sexpr{ztrunc(mean(rpmiss)/142)}}}\\
Class Tree no IHD & \Sexpr{ztrunc((rp1.1t[1]*rp1.1t[4]-rp1.1t[3]*rp1.1t[2])/rp1.1tt)} & \Sexpr{ztrunc(rp1.1t[4]/(rp1.1t[4]+rp1.1t[2]))} & $\mathbf{\Sexpr{ztrunc(rp1.1t[1]/(rp1.1t[1]+rp1.1t[3]))}}$ &  \Sexpr{ztrunc((rp1.1t[2]+rp1.1t[3])/142)} & \Sexpr{ztrunc(mean(fold.cart.miss1))} & \Sexpr{ztrunc(mean(rpmiss1)/142)}}}\\ \hline  
  %MLE Logistic (F)  & \Sexpr{ztrunc((gm.fac[1]*gm.fac[4]-gm.fac[3]*gm.fac[2])/gmm.fac)} & \Sexpr{ztrunc(gm.fac[4]/(gm.fac[4]+gm.fac[2]))} & \Sexpr{ztrunc(gm.fac[1]/(gm.fac[1]+gm.fac[3]))} & \Sexpr{ztrunc((gm.fac[2]+gm.fac[3])/142)} & $\mathbf{\Sexpr{ztrunc(glmcv.fac$delta)}}$ & $\mathbf{\Sexpr{ztrunc(mean(glmiss.fac)/142)}}$ \\ 
  \end{tabular}
  \end{minipage}
\end{table}

%$
\subsection{Discussion}
Study One set out to address the three problems of using propensity score models to control for volunteer bias: 1) finding the right predictors, 2) combining these predictors statistically, and 3) making the questionnaire short enough to appeal to non-volunteers.  Addressing the first problem, only one or two questions eliciting participants' expectations of the in-home display were sufficient to predict willingness to volunteer.  Once these variables were accounted for there was little or no benefit to predictive power of adding additional variables.  These findings are consistent with work showing that the context of the program that is being offered, such as expectations about whether one will benefit from the trial, play an important role in volunteering.

When we expanded the search for the right predictors by excluding the in-home display expectations, the Classification Tree indicated that environmental concern was the important factor for determining enrollment intentions.  As long as people cared about the environment they were likely to enroll, except for the rare case when participants both did not like volunteering, as indicated by not donating their time to a charity, and were not interested in new technology.  Those not concerned about the environment were unlikely to volunteer unless it was convenient for them (home between 6pm and 10pm), they were younger, had some concern for nature, and were not very wealthy.

Surprisingly, several variables that were expected to be the ``right'' predictors were weakly predictive or unrelated to enrollment intentions.  First, environmental concerns and affinity for volunteering were not nearly as strong predictors as the in-home display expectations.  More surprising was the finding that the behavioral measure of volunteering, where participants were asked to help the researcher by completing an additional questionnaire for no financial incentive, was unrelated to intentions to enroll.  One would expect that an actual instance of volunteering behavior, completing the additional questionnaire, would be strongly related to other volunteering behaviors such as enrolling in an energy efficiency trial.  

One explanation for this surprise is that volunteering for an energy efficiency program may not be viewed as a purely altruistic act, as the program was presented as having both tangible (financial) benefits in addition to the more subtle social benefits of improving the environment.  Presenting the program in such a way may lead to conflict between self-interested and altruistic motivations, leading to ``crowding out'' of the more charitable motivation (cite cite).

The results are somewhat consistent with the propensity score model from the ComEd Energy Smart Pricing Pilot \cite{sbc2006evaluation,sbc2007evaluation}, who found that new major appliance purchases and age were unrelated to enrollment intentions, those who used fans to cool the home and had more people in the household were less likely to enroll, whereas those in single-family detached homes were more likely to enroll.  We did not find that the number of household members mattered, but did replicate the lack of association between age and enrollment intentions.  Appliance purchases, in particular energy efficient appliance purchases such as the CFL, were positively associated with enrollment in our study but unassociated in the ComEd study.

Study One also addressed the second problem of using statistical methods to find the right combination of predictors by comparing two simple heuristics (Take the Best and equal weights) and two sophisticated prediction methods (Logistic regression and Classification Tree).  The TTB heuristic and Classification Tree algorithm easily and automatically found the best performing combination of the ``right'' predictors.  Both the Logistic regression and Classification Tree approach performed similarly, and comparable to TTB, because they both gave the largest weight to the best predictor, the expectation of enjoyment of the in-home display.  This finding is consistent with previous research showing social science contexts involve multiple correlated predictors that are unlikely to perform better than the best predictor.  However, Study One also showed that using the TTB heuristic or Classification Trees is superior to Logistic regression, as they produce better generalization error and automatically perform variable selection.

The results also address the third problem by showing that one can reach optimal predictive accuracy using only two items about expectations of benefit from the technology, and that even without these two items only seven questions were needed to implement the more complex Classification Tree.  Thus, it is feasible to create a simple and short questionnaire that non-volunteers would complete.

An alternative interpretation, however, is that optimal prediction involved so few variables because we were poor at picking measures, as the inability of human judges to use complex models intuitively is well known \cite{dawes1974linear}.  To rule out this possibility, Study Two refines the measures used in Study One and expands the set of predictors to include additional validated scales that would address prediction from multiple independent dimensions.

\footnote{Principal Components Analysis uses Spectral Decomposition \cite{wilkinson1965algebraic} to find the set of eigenvectors for the variance-covariance matrix of the data.  This uses the EISPACK \cite{smith1976matrix} and LAPACK routines (DSYEVR, DGEEV, ZHEEV, ZGEEV, \url{http://www.netlib.org/lapack/lug/node29.html}) or Singular Value Decomposition \cite{anderson1999lapack}.}  

\subsection{Results}
\subsubsection{Test Error}
Table~\ref{tab:twotest} compares the ability of models trained on Study One data to predict enrollment intentions for Study Two.  As can be seen, the test error from Study Two either matches or is better than the in-sample and bootstrapped estimates of generalization error from Study One for TTB, equal weights, and the Classification Tree.  This indicates that these approaches are well-calibrated predictors of volunteering.  On the other hand, the Logistic regression performed the best according to in-sample error in Study One but the cross-validation, bootstrap, and in-sample statistics drastically underestimated generalization error (by \Sexpr{prettyNum(-100*(1-glmiss.test/glmcv$delta))}\%, \Sexpr{prettyNum(-100*(1-glmiss.test/(mean(glmiss)/181)))}\%, and \Sexpr{prettyNum(-100*(1-glmiss.test/((gm[2]+gm[3])/181)))}\%, respectively).\footnote{For TTB, in-sample and bootstrap estimates overestimated generalization error, whereas 10-Fold CV underestimated.  For equal weights, in-sample and bootstrap slightly overestimated generalization error, whereas 10-Fold CV again underestimated.  In contrast, 10-Fold CV and bootstrap \emph{overestimated} generalization error for the Classification Tree, with the in-sample estimate being exactly correct.}

\begin{table}[h]
    \caption{Performance of classification methods trained on Study One data predicting Study Two data.}
    \label{tab:twotest}
  \centering
  \begin{tabular}{c c c c c}
    & Study Two & \multicolumn{3}{c}{Study One} \\ \cline{2-5}
    Method & Test Error & In-Sample & 10-Fold & Boot \\ \hline
    Take The Best & \Sexpr{ztrunc(ttbmiss.test)} & \Sexpr{ztrunc((gm2[2]+gm2[3])/142)} & $\mathbf{\Sexpr{ztrunc(glmcv2$delta)}}$ & \Sexpr{ztrunc(mean(ttbmiss)/142)}   \\ 
    Equal Weights &  \Sexpr{ztrunc(mean(miss1.2)/179)} & \Sexpr{ztrunc(mean(miss1)/142)} & \Sexpr{ztrunc(glmcv3$delta)} & \Sexpr{ztrunc(mean(miss)/142)} \\ 
    Logistic & \Sexpr{ztrunc(glmiss.test)} & $\mathbf{\Sexpr{ztrunc((gm[2]+gm[3])/142)}}$ & \Sexpr{ztrunc(glmcv$delta)} & \Sexpr{ztrunc(mean(glmiss)/142)}  \\
    Class Tree & $\mathbf{\Sexpr{ztrunc(rpmiss.test)}}$ & \Sexpr{ztrunc(22/142)} & \Sexpr{ztrunc(mean(fold.cart.miss))} & $\mathbf{\Sexpr{ztrunc(mean(rpmiss)/142)}}$  \\  \hline
  \end{tabular}
\end{table}

\footnote{Several scales dropped the items with lowest item-total correlations to condense the scale to four items.  The purchases scale had four items (Eco-Friendly, Switch, Appliances, Equal) after removing new technology and throwing away obsolete (\Sexpr{ztrunc(it1.buy)} and \Sexpr{ztrunc(it6.buy)}, respectively); the self-efficacy scale (Complicated, Capable, Problems, Plans) removed one item, get right to work (\Sexpr{ztrunc(it2.eff)}); the exploration scale (??, Challenge, Problems, Words) removed new writers (\Sexpr{ztrunc(it1.ex)}) and looking up new words (\Sexpr{ztrunc(it4.ex)}); the Frugality scale (Care Posessions, Reuse Items, Resist Buying, Wait Purchase) dropped no throw away (\Sexpr{ztrunc(it2.fr)}) and better use resources (\Sexpr{ztrunc(it3.fr)}).  No items were dropped from the socialization (Close friends, Talk two weeks, Party/Gathering), and the social comparison scale (More Reduce, Care Environment, Consistent Recycle, Active Community), which had only three and four items to begin with.  The trust scale consisted of four items (Local, Scientists, Utility, Co-Workers).  We remove the four items with the lowest item-total correlations from the scale (family \Sexpr{ztrunc(it6.trust2)}, friends \Sexpr{ztrunc(it7.trust2)}, federal government \Sexpr{ztrunc(it1.trust2)}, and community \Sexpr{ztrunc(it5.trust2)}).  After removing these four items, the trust scale consisted of four items (Local, Scientists, Utility, Co-Workers).  Trust in scientists and utility were significantly related to volunteer intentions.}  


\footnote{It is important to look at in-sample and bootstrap estimates of generalization error for TTB, equal weights, and Classification trees, as these will be conservative, but only bootstrap for Logistic, as this is the weakest underestimation of generalization error.  TTB and equal weights performed roughly equally well on in-sample and bootstrap estimates of generalization error.  These were both inferior to the bootstrap estimate of generalization error for the logistic regression.  However, the best performance was by the classification tree with IHD items, which had only an 11\% generalization error estimate based on the likely accurate in-sample error.} 

\section{Logistic Regression using Maximum Likelihood Estimation}
\label{app:glm}
Bishop chapter 3 \& 4

The traditional approach to classification in econometrics is logistic regression using maximum likelihood estimation a type of linear probabilistic model.  The logistic maximum likelihood estimation is \cite{bishop2006pattern}:

\begin{align}
P(E|\vec{x})=\frac{p(\vec{x}|E)p(E)}{p(\vec{x}|E)p(E)+p(\vec{x}|\neg E)p(\neg E)} \\
= \frac{1}{1+e^{-a}}=\sigma(a)
\end{align}
where 
\begin{equation}
  a=ln(\frac{p(\vec{x}|E)P(E)}{p(\vec{x}|\neg E)p(\neg E)})
\end{equation}

The inverse of the logistic sigmoid $\sigma(a)$ is the logit function:
\begin{equation}
  a=ln(\frac{\sigma}{1-\sigma})
\end{equation}

If the conditional densities are Gaussian and share a covariance matrix then:
\begin{equation}
  p(\vec{x}|E)=\frac{1}{(2\pi)^{D/2}}\frac{1}{\|\Sigma\|^{1/2}}e^{-\frac{1}{2}(\vec{x}-\mu_{E})^{T}\Sigma^{-1}(\vec{x}-\mu_{k})}
\end{equation}

The posterior probability of class membership given this conditional density is:
\begin{align}
  p(E|\vec{x})=\sigma(\vec{w}^{T}\vec{x}+w_{0}) \\
  \vec{w}=\Sigma^{-1}(\mu_{1}-\mu_{2}) \\
  w_{0}=-\frac{1}{2}\vec{\mu}_{1}^{T}\Sigma^{-1}\vec{\mu}_{1}+\frac{1}{2}\vec{\mu}_{2}^{T}\Sigma^{-1}\vec{\mu}_{2}+ln\frac{p(E)}{p(\neg E)} \\
 \end{align}

Our model uses only the predictors that were significantly related to enrollment intention in zero-order correlations, as well as a condensed version using factor scores from the Principal Components Analysis.  

\section{Classification Trees}
\label{app:class}

Bishop 14.4 and Breiman, Friedman, Olshen and Stone, 1984; an introduction to recursive partitioning using the rpart routines \cite{therneau1997introduction}

Classification trees look at all possible splits of the data based on the input vector.  This is practically implemented using greedy optimization \cite{bishop}.  One starts at the root node, adding one node at a time.  Nodes to split and where they should be split needs to be determined.  This is done by exhaustive search for each vriable and split, then repeating for all variables, retaining the one with smallest error.  A tree is then grown until some numbe rof data points are in each leaf node, then pruning back to reach an area with low generalization error.  

The tree involves \cite{breiman1984classification}:
\begin{enumerate}
\item Selection of nodes and their splits
\item Making a node terminal or splitting it
\item Assigning terminal nodes to a class
\end{enumerate}

To do this, a notion of node impurity is used.  When all classes are equaly likely in a node, then it has maximum impurity (maximum entropy).  When the classes are separated perfectly by a node, then they have minimum impurity (minimum entropy).  The benefit of any proposed split is by the decrease in impurity it provides over no split at all.  Each node is then evaluated at its possible splits to see if it decreases the impurity (entropy), and by how much.  Search all splits for the one that decreases the impurity the most.  Then make that split, and repeat for its nodes. \cite{rpartman,breiman1984classification}

The most important part is the decrease impurity function:
\begin{equation}
  \Delta i(s,t)=i(t)-p(R)i(R)-p(L)i(L)
\end{equation}

Where i(t) is the impurity of the parent node, p(R) is the proportion of cases split into the right side, p(L) is the proportion of cases split to the left side, and i(R) and i(L) is he impurity of the left and right nodes.  The total tree impurity is the sum of the impurity of the terminal nodes multiplied by the probability of reaching each terminal node:

\begin{equation}
  I(T)=\sum_{t} I(t)=\sum_{t}i(t)p(t)
\end{equation}

The approach to choosing a stopping rule is to grow the tree maximally then prune using cross-validation to estimate generalization error. 

The impurity measure is the Shannon Entropy:
\begin{equation}
  i(t)=-\sum_{j}p(j|t)\log_{2}([p(j|t)])
\end{equation}

Problems: Linearly separable data; exclusive or
Splits are always perpendicular to a coordinate axis (on one variable), rather than oblique, along two variables.  Compare to discriminant analysis, which does well with this type of problem.  Splits can be extended to include linear combinations of the form:
\begin{equation}
  \sum_{m}a_{m}x_{m}\le c
\end{equation}

Trees are robust to outliers and errors because they weight each data point equally, rather than by leverage as in linear least squares.

Pruning involves growing the tree to maximum depth untill terminal nodes are pure or contain only identical features.  Pruning a branch cuts all decendents of a node.  The result is a pruned subtree.  Branches are pruned based on the in-sample misclassification rate of the branch.  The complexity of a subtree is the number of terminal nodes in the subtree.  The cost complexity tradeoff is then:
\begin{equation}
  R_{\alpha}(T)=R(T)+\alpha\|\tilde t \|
\end{equation}

Where $\tilde t$ is the number of terminal nodes in the subtree.  This makes $\alpha$ a tuning parameter, varied and chosen through cross-validation.  The pruning starts at the smallest subtree of the total tree that equals the in-sample missclassification rate of the total tree.  This is done by starting at terminal nodes and deleting them until the misclassification rate increases.  Doing this for all terminal nodes yields the initial subtree.  Minimal cost-complexity pruning is basically cutting the weakest link.

\section{Study One Materials}
\label{app:measone}

All participants first read the following introduction:

\begin{quote}
Thank you for participating in the Electricity Choices Survey. We would like you to evaluate a program we are considering offering to those living in the Washington DC area. You do not need to live in this area to help us. Please read the document below and judge whether you would want to participate in this program or not if it were offered to you. Then, please finish the rest of the survey. Thank you!
\end{quote}

They then read the recruitment offering:

\begin{quote}
To whom it may concern,

You are receiving this letter because Carnegie Mellon University and the United States Department of Energy need your help to evaluate ways of communicating electricity use information.

You have been chosen to receive a free digital photo frame and one year subscription (to see what it looks like, go to: [link]). You can use this photo frame to upload and stream your digital photos. We would also like you to use and evaluate this photo frame as a means for receiving digital information from your electricity company. On the frame, you will receive information about your electricity use from time to time. You can also upload your own photos to the frame. After a 1-year evaluation period, during which we will ask you to provide feedback on the frame, you can keep the frame if you like it or return it for a \$25 gift certificate.

We will send you four surveys (every three months) over the course of the year asking about the frame and your electricity use. Otherwise, your electricity use will be recorded over the 1-year period in the usual way (remote meter reading).

If you choose to participate, you will receive the digital photo frame 1 week from now. There will be no cost to you and all of your information will remain confidential, as is university and government policy.

Thank you,
Carnegie Mellon University Research Team
\end{quote}

The main dependent variable was their response to the following question:
\begin{quote}
  Would you enroll in this offering if it were available to you? (Y/N)
  \end{quote}

Standard demographics such as age, gender, employment status (full time, part time, unemployed, student, homemaker, retired), education (less than high school, high school/GED, college, associate's degree, 4 year degree, and professional degree), annual household income, race, and political affiliation.  We also asked if they were the primary billpayer, how many adults and children (under age 18) lived in the home, how many hours they spent in the home per day, and when they are usually in the home with six periods (10am-2pm, 2pm-6pm, 6pm-10pm, 10pm-2am, 2am-6am, 6am-10am).

Next are a series of questions used to predict intentions to volunteer.  Next to each question is a code (e.g., IHD1+) that will be used to reference the item in the text, and the +/- stands for our a priori predicted direction, where + means it will be associated with increased likelihood of enrolling, and - decreased likelihood of enrolling.

They then answered three social comparison questions (strongly disagree to strongly agree):
\begin{quote}
To what extent do you disagree or agree with the following statements?
Compared to the average household in the city ...
\begin{itemize}
\item My household uses more electricity. [more.use+]
\item My household has done more to reduce its electricity consumption.[more.reduce+]
\item My household cares more about the environment. [care.environment+]
\end{itemize}
\end{quote}

This was followed by six questions asking them about the effectiveness of messages to get them to participate (strongly disagree to strongly agree):
\begin{quote}
  How effective would the following messages be to get you to participate in an electricity efficiency program?
  \begin{itemize}
    \item Protecting the environment
      \item Saving Money [save.money+]
        \item Increasing personal control over energy use [control+]
          \item More reliable service [reliability+]
            \item Increasing independent energy security for the US [energy.independence+]
              \item Providing more electricity choices [more.choices+]
                \item Avoiding wasting energy [avoid.waste+]
                  \end{itemize}
\end{quote}

Next were three questions about control and technology purchases (strongly disagree to strongly agree):
\begin{quote}
  To what extent do you disagree or agree with the following statements?
  \begin{itemize}
    \item I feel that I personally have control over how much electricity is consumed in my household.
      \item I am always eager to be the first to buy a new technology.
        \item When I have a choice between two equal products, I always buy the one that is less harmful to other people and the environment.
                  \end{itemize}
  \end{quote}

Next was the New Ecological Paradigm scale.  They were asked:
\begin{quote}
  To what extent do you disagree or agree with the following statements (strongly disagree to strongly agree):
\begin{itemize}
\item The balance of nature is very delicate and easily upset. [delicate.nature+]
  \item Humans have the right to modify the natural environment to suit their needs. [modify.environment-]
    \item Mankind was created to rule over the rest of nature. [rule.nature-]
      \item Plants and animals exist primarily to be used by humans. [use.plants.animals-]
        \item The earth is like a spaceship with only limited room and resources. [spaceship.earth+]
          \item There are limits to growth beoyind which our industrialized society cannot expand. [growth.limits+]
\end{itemize}
\end{quote}

They were then asked to report their average monthly electricity bill, and two questions about their perceptions of their utility company:
\begin{quote}
  To what extent do you agree or disagree with the following statements (strongly disagree to strongly agree)?
  \begin{itemize}
\item I am satisfied with my electricity company. [satisfied.utility+]
  \item I trust my electricity company to do the right thing by their customers. [trust.utility+]
    \end{itemize}
  \end{quote}

They were asked about contacting the electricity company:
\begin{quote}
  Have you ever contacted your electricity company about any of the following (please check all that apply):
  \begin{itemize}
    \item Blackouts and blackout-related problems [contact.blackouts-]
      \item Incorrect or confusing billing [contact.billing-]
        \item Poor provision of electricity services [contact.provision-]
          \item I have never contacted the electricity company [never.contact+]
            \item Other reason. [other.contact-] 
            \end{itemize}
  \end{quote}
They were then asked if they had ever participated in any electricity programs:
\begin{quote}
  Have you previously participated in any programs offered by your electric utility company? (Yes/No/Not Applicable) [previous.program+]
  \end{quote}

They were then asked a series of questions of programs that they might have volunteered for:
\begin{quote}
  In the past 12 months have you done any of the following? (Yes/No/Not applicable)
  \begin{itemize}
    \item Bought Compact Flourescent Lights (CFLs) [cfl+]
      \item Used an electricity tracking device (e.g., an in-home dispay) [tracking.device+]
        \item Bought one or more energy efficient appliances [efficient.appliances+]
          \item Insulated my home [insulated.home+]
            \item Got a flu shot [flu.shot+]
              \item Recycled [recycled+]
                \item Contributed to a retireement savings (e.g., 401k) [401k+]
                  \item Used the public library [library+]
                    \item Enrolled in prize drawings [prize.drawings+]
                      \item Donated time to a charity or non-profit organization [donated.time+]
                        \item Donated money to a charity or non-profit organization [donated.money+]
                          \item Bought a lottery ticket [lottery.ticket+]
                            \end{itemize}
  \end{quote}

They were then asked several questions about their awareness of technology:
\begin{quote}
  Do you currently have a smart meter in your home (Yes/No/Don't know) [have.meter+] \\
  Have you heard of in-home electricity displays before this survey? (Yes/No/Don't know) [heard.ihd+] \\
  To what extent do you agree or disagree with the following statement? (strongly disagree to strongly agree)
\begin{enumerate}
\item An in-home display would help me save electricity each month. [ihd.save.electricity+]
  \item An in-home display would help me save money each month. [ihd.save.money+] 
   \item I would enjoy having an in-home display in my home. [ihd.enjoy+] 
     \item I would learn a lot from an in-home display. [ihd.learn+]
\end{enumerate}

\end{quote}

Before the survey ended, we included a behavioral measure of volunteering, where they could continue filling out additional questions to help us out:
\begin{quote}
  Once you've done this you will receive payment.  However, we'd also like feedback on the survey.  Please help us by filling out the questionnaire on the next few pages about your experience with the survey. (continue/ No thanks, I'm done) [continue.survey+]
  \end{quote}




